mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
220 Commits
silero-vad
...
v2.25.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
07655c0c2e | ||
|
|
bebfd19b45 | ||
|
|
6e34430d99 | ||
|
|
0d08aaa29b | ||
|
|
66f9c06e7d | ||
|
|
775adf871f | ||
|
|
a0fc19a3d6 | ||
|
|
7bd18662a7 | ||
|
|
95b0739906 | ||
|
|
cad7e9a1cd | ||
|
|
4426efab05 | ||
|
|
6765b17acd | ||
|
|
ae1340d59b | ||
|
|
fc52f179fe | ||
|
|
4f43a9a162 | ||
|
|
20edd44463 | ||
|
|
1a4f9d8453 | ||
|
|
f2dd33b8f4 | ||
|
|
25e988868c | ||
|
|
ab344e4f47 | ||
|
|
fac7893dd6 | ||
|
|
9be338cfe4 | ||
|
|
b4d4f96919 | ||
|
|
8cc2d01caa | ||
|
|
bf37eebecb | ||
|
|
3f0850b58b | ||
|
|
2ffa89b8b9 | ||
|
|
d43adc0205 | ||
|
|
78b34505ab | ||
|
|
e55a1bed59 | ||
|
|
0d7550ad54 | ||
|
|
b5992255ac | ||
|
|
e845cc0401 | ||
|
|
a10033e8a4 | ||
|
|
6c6d840e6b | ||
|
|
a8b3b3d6f4 | ||
|
|
ec66f7e3b1 | ||
|
|
05841c2435 | ||
|
|
c553d73748 | ||
|
|
1006e8a2ed | ||
|
|
9bcfda171b | ||
|
|
baee4f7bd5 | ||
|
|
286dc32fe0 | ||
|
|
36e4c0fcf0 | ||
|
|
3c21c8789a | ||
|
|
d9facbcee9 | ||
|
|
930280ecac | ||
|
|
3415e6ae74 | ||
|
|
f1082f3c6d | ||
|
|
f345f7a795 | ||
|
|
1a2a7a57b3 | ||
|
|
ae80a2bd24 | ||
|
|
c30ecdd535 | ||
|
|
f16c7cef92 | ||
|
|
e1dd78bcea | ||
|
|
25acb0cbbc | ||
|
|
7674c80bb6 | ||
|
|
e044970a5b | ||
|
|
639526d207 | ||
|
|
998ff9fa22 | ||
|
|
7122c7472e | ||
|
|
671381267a | ||
|
|
d1762e098e | ||
|
|
270d33504b | ||
|
|
9b0983d027 | ||
|
|
afd0af987d | ||
|
|
58524d40c9 | ||
|
|
2a7222c6aa | ||
|
|
0093985e7c | ||
|
|
7f51e2dddf | ||
|
|
f3bbdef77d | ||
|
|
9cbf168dc0 | ||
|
|
9572f0577b | ||
|
|
1a14c7d45a | ||
|
|
5c29e0cd4d | ||
|
|
1a74af1492 | ||
|
|
8f6332ab23 | ||
|
|
816ae7a53a | ||
|
|
1d630e4185 | ||
|
|
bc8dd3ad14 | ||
|
|
b969053701 | ||
|
|
60bf7c9dd7 | ||
|
|
d65c10cee7 | ||
|
|
6c71698299 | ||
|
|
c7c275c7c8 | ||
|
|
d0adbee75d | ||
|
|
159a7f6df2 | ||
|
|
0eb2911aad | ||
|
|
cab9f88ca4 | ||
|
|
a3b675b09e | ||
|
|
6477913e8f | ||
|
|
138cd97ce7 | ||
|
|
4dd9ac39b0 | ||
|
|
23499ddc8a | ||
|
|
8864156300 | ||
|
|
478014ca18 | ||
|
|
d45477b003 | ||
|
|
396fb88e33 | ||
|
|
a429ec1b3f | ||
|
|
5b5fb9c22a | ||
|
|
801a87c3a6 | ||
|
|
badbd212f7 | ||
|
|
c4bbecc4d6 | ||
|
|
8a08e9ec67 | ||
|
|
61e486dbf5 | ||
|
|
f2f387e1dd | ||
|
|
3be9a08fc9 | ||
|
|
b325807c60 | ||
|
|
ae9855a39e | ||
|
|
9ac62b589f | ||
|
|
d12660a286 | ||
|
|
3d3bd2d10f | ||
|
|
b656d10556 | ||
|
|
8c67f38ef6 | ||
|
|
4623728cd7 | ||
|
|
5f804aa6e8 | ||
|
|
f52c6e3a31 | ||
|
|
0b4bb7a562 | ||
|
|
2bc4b56a79 | ||
|
|
fc920cc58a | ||
|
|
fdb560b8e5 | ||
|
|
708cba0c1b | ||
|
|
24abf568cb | ||
|
|
7ca0e2d925 | ||
|
|
037e8030bf | ||
|
|
472d11f884 | ||
|
|
b40d5d12b7 | ||
|
|
6938618e30 | ||
|
|
5d9c530eaa | ||
|
|
9429a53db7 | ||
|
|
1d6d301370 | ||
|
|
8f2be82667 | ||
|
|
cca911f3e5 | ||
|
|
e37bbbaacc | ||
|
|
59cbf38b4b | ||
|
|
432c31d904 | ||
|
|
af33483687 | ||
|
|
5051074845 | ||
|
|
fc4a714992 | ||
|
|
0429e00746 | ||
|
|
73f1f25b9a | ||
|
|
044570fa85 | ||
|
|
37527420de | ||
|
|
1854b8c612 | ||
|
|
b8824f2ad9 | ||
|
|
3ab83e91df | ||
|
|
f2cb261797 | ||
|
|
c85f46a71d | ||
|
|
75b283d83c | ||
|
|
1918efdfdd | ||
|
|
ec239a0cd0 | ||
|
|
b74a936178 | ||
|
|
de1ddb8ba6 | ||
|
|
272763f625 | ||
|
|
3aff87a5cf | ||
|
|
885118e863 | ||
|
|
a03a9b9e51 | ||
|
|
f45d6c746a | ||
|
|
5eceb5f67c | ||
|
|
a9c0dd3a1e | ||
|
|
fb17e737f0 | ||
|
|
b5a21202ed | ||
|
|
e147f1bd3e | ||
|
|
61839efed2 | ||
|
|
a0fe050055 | ||
|
|
f943c4b803 | ||
|
|
cea5a0ea42 | ||
|
|
f5e1527a5a | ||
|
|
7184ca546f | ||
|
|
5592f5e820 | ||
|
|
d4c1746c7d | ||
|
|
88737e1d76 | ||
|
|
ba225f660b | ||
|
|
3127cd1352 | ||
|
|
b90d78d9f6 | ||
|
|
b86a3e4fa6 | ||
|
|
be907d993f | ||
|
|
ab0f8648a3 | ||
|
|
c226149503 | ||
|
|
4a079f893c | ||
|
|
87b7648591 | ||
|
|
cf4f024420 | ||
|
|
3c0ac49d90 | ||
|
|
4307ae5d52 | ||
|
|
50f71f73d7 | ||
|
|
dc04a43868 | ||
|
|
cc04b62d3a | ||
|
|
feb54e65c2 | ||
|
|
44a5dac312 | ||
|
|
074b52bbfe | ||
|
|
236a60bab8 | ||
|
|
7b70f0543b | ||
|
|
5f33962932 | ||
|
|
45b91d501e | ||
|
|
e51792784a | ||
|
|
28594336e9 | ||
|
|
9c9359fc96 | ||
|
|
bc5d1f255b | ||
|
|
0fcefbc168 | ||
|
|
9044b17e4d | ||
|
|
ad31daf03b | ||
|
|
1167487f5e | ||
|
|
61358e4d35 | ||
|
|
2c8a87b1e4 | ||
|
|
55aad5f525 | ||
|
|
58ff47de26 | ||
|
|
0d6c3a7d57 | ||
|
|
e001fada6c | ||
|
|
f4547fcf8a | ||
|
|
7b75e9de2d | ||
|
|
cbedf2f428 | ||
|
|
0597f3b9e9 | ||
|
|
5f688d7a8d | ||
|
|
fa20628b3a | ||
|
|
13bf048cfc | ||
|
|
bdd6920910 | ||
|
|
3c3050f68e | ||
|
|
1688ba7f2a | ||
|
|
e8128a339a | ||
|
|
369110e6bf |
9
.env
9
.env
@@ -82,6 +82,15 @@
|
||||
# Enable to allow p2p mode
|
||||
# LOCALAI_P2P=true
|
||||
|
||||
# Enable to use federated mode
|
||||
# LOCALAI_FEDERATED=true
|
||||
|
||||
# Enable to start federation server
|
||||
# FEDERATED_SERVER=true
|
||||
|
||||
# Define to use federation token
|
||||
# TOKEN=""
|
||||
|
||||
### Watchdog settings
|
||||
###
|
||||
# Enables watchdog to kill backends that are inactive for too much time
|
||||
|
||||
9
.github/labeler.yml
vendored
9
.github/labeler.yml
vendored
@@ -1,6 +1,15 @@
|
||||
enhancements:
|
||||
- head-branch: ['^feature', 'feature']
|
||||
|
||||
dependencies:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'Makefile'
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*.mod'
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*.sum'
|
||||
|
||||
kind/documentation:
|
||||
- any:
|
||||
- changed-files:
|
||||
|
||||
17
.github/workflows/bump_deps.yaml
vendored
17
.github/workflows/bump_deps.yaml
vendored
@@ -12,23 +12,14 @@ jobs:
|
||||
- repository: "ggerganov/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||
branch: "master"
|
||||
- repository: "donomii/go-rwkv.cpp"
|
||||
variable: "RWKV_VERSION"
|
||||
branch: "main"
|
||||
- repository: "ggerganov/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-bert.cpp"
|
||||
variable: "BERT_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
- repository: "PABannier/bark.cpp"
|
||||
variable: "BARKCPP_VERSION"
|
||||
branch: "main"
|
||||
- repository: "mudler/go-ggllm.cpp"
|
||||
variable: "GOGGLLM_VERSION"
|
||||
- repository: "leejet/stable-diffusion.cpp"
|
||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
||||
branch: "master"
|
||||
- repository: "mudler/go-stable-diffusion"
|
||||
variable: "STABLEDIFFUSION_VERSION"
|
||||
|
||||
47
.github/workflows/image.yml
vendored
47
.github/workflows/image.yml
vendored
@@ -280,6 +280,7 @@ jobs:
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
@@ -301,6 +302,7 @@ jobs:
|
||||
latest-image: 'latest-cpu'
|
||||
latest-image-aio: 'latest-aio-cpu'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -312,6 +314,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
@@ -323,6 +326,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -334,6 +338,7 @@ jobs:
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'false'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
@@ -344,6 +349,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
@@ -354,4 +360,45 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
# parallel-builds:
|
||||
# uses: ./.github/workflows/image_build.yml
|
||||
# with:
|
||||
# tag-latest: ${{ matrix.tag-latest }}
|
||||
# tag-suffix: ${{ matrix.tag-suffix }}
|
||||
# ffmpeg: ${{ matrix.ffmpeg }}
|
||||
# image-type: ${{ matrix.image-type }}
|
||||
# build-type: ${{ matrix.build-type }}
|
||||
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
# platforms: ${{ matrix.platforms }}
|
||||
# runs-on: ${{ matrix.runs-on }}
|
||||
# aio: ${{ matrix.aio }}
|
||||
# base-image: ${{ matrix.base-image }}
|
||||
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
# makeflags: ${{ matrix.makeflags }}
|
||||
# latest-image: ${{ matrix.latest-image }}
|
||||
# latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
# skip-drivers: ${{ matrix.skip-drivers }}
|
||||
# secrets:
|
||||
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
# strategy:
|
||||
# matrix:
|
||||
# include:
|
||||
# - build-type: 'cublas'
|
||||
# cuda-major-version: "12"
|
||||
# cuda-minor-version: "0"
|
||||
# platforms: 'linux/arm64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-nvidia-l4t-arm64-core'
|
||||
# latest-image: 'latest-nvidia-l4t-arm64-core'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'core'
|
||||
# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
# runs-on: 'self-hosted'
|
||||
# makeflags: "--jobs=4 --output-sync=target"
|
||||
# skip-drivers: 'true'
|
||||
|
||||
6
.github/workflows/image_build.yml
vendored
6
.github/workflows/image_build.yml
vendored
@@ -49,6 +49,10 @@ on:
|
||||
description: 'FFMPEG'
|
||||
default: ''
|
||||
type: string
|
||||
skip-drivers:
|
||||
description: 'Skip drivers by default'
|
||||
default: 'false'
|
||||
type: string
|
||||
image-type:
|
||||
description: 'Image type'
|
||||
default: ''
|
||||
@@ -234,6 +238,7 @@ jobs:
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.65.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
@@ -262,6 +267,7 @@ jobs:
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.65.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,6 +2,7 @@
|
||||
/sources/
|
||||
__pycache__/
|
||||
*.a
|
||||
*.o
|
||||
get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
|
||||
@@ -115,12 +115,13 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
ARG SKIP_DRIVERS=false
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
@@ -136,7 +137,7 @@ EOT
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
@@ -162,7 +163,7 @@ RUN <<EOT bash
|
||||
EOT
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
@@ -170,7 +171,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
hipblas-dev \
|
||||
|
||||
104
Makefile
104
Makefile
@@ -8,16 +8,12 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
|
||||
CPPLLAMA_VERSION?=ba8a1f9c5b675459c55a83e3f97f10df3a66c788
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||
|
||||
# bert.cpp version
|
||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
@@ -30,6 +26,14 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
ONNX_OS?=linux
|
||||
@@ -198,7 +202,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
@@ -206,6 +209,14 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
|
||||
ifeq ($(ONNX_OS),linux)
|
||||
ifeq ($(ONNX_ARCH),x64)
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
|
||||
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||
@@ -228,19 +239,6 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert.cpp:
|
||||
mkdir -p sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(BERT_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(BERT_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
mkdir -p sources/go-llama.cpp
|
||||
@@ -254,6 +252,23 @@ sources/go-llama.cpp:
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
cd sources/bark.cpp && \
|
||||
git checkout $(BARKCPP_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||
cd sources/bark.cpp && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
|
||||
$(MAKE) -C backend/go/bark libbark.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
mkdir -p sources/go-piper
|
||||
@@ -267,7 +282,7 @@ sources/go-piper:
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||
|
||||
## stable diffusion
|
||||
## stable diffusion (onnx)
|
||||
sources/go-stable-diffusion:
|
||||
mkdir -p sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && \
|
||||
@@ -280,6 +295,30 @@ sources/go-stable-diffusion:
|
||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## stablediffusion (ggml)
|
||||
sources/stablediffusion-ggml.cpp:
|
||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||
cd sources/stablediffusion-ggml.cpp && \
|
||||
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
|
||||
cd sources/stablediffusion-ggml.cpp && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake $(CMAKE_ARGS) .. && \
|
||||
cmake --build . --config Release
|
||||
|
||||
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||
|
||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
||||
endif
|
||||
|
||||
sources/onnxruntime:
|
||||
mkdir -p sources/onnxruntime
|
||||
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||
@@ -320,12 +359,11 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
@@ -334,7 +372,6 @@ replace:
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
@@ -349,7 +386,6 @@ rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
@@ -364,7 +400,9 @@ clean: ## Remove build related file
|
||||
rm -rf release/
|
||||
rm -rf backend-assets/*
|
||||
$(MAKE) -C backend/cpp/grpc clean
|
||||
$(MAKE) -C backend/go/bark clean
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
||||
rm -rf backend/cpp/llama-* || true
|
||||
$(MAKE) dropreplace
|
||||
$(MAKE) protogen-clean
|
||||
@@ -707,13 +745,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
||||
backend-assets/grpc: protogen-go replace
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bert-embeddings
|
||||
endif
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
ifneq ($(UPX),)
|
||||
@@ -773,10 +804,6 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||
@@ -824,6 +851,13 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bark-cpp
|
||||
endif
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
|
||||
@@ -92,6 +92,8 @@ local-ai run oci://localai/phi-2:latest
|
||||
|
||||
## 📰 Latest project news
|
||||
|
||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||
@@ -124,10 +126,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||
- 🎨 [Image generation](https://localai.io/features/image-generation)
|
||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
@@ -135,6 +137,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- 🔊 Voice activity detection (Silero-VAD support)
|
||||
- 🌍 Integrated WebUI!
|
||||
|
||||
## 💻 Usage
|
||||
@@ -157,6 +160,7 @@ Model galleries
|
||||
Other:
|
||||
- Helm chart https://github.com/go-skynet/helm-charts
|
||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
|
||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
name: text-embedding-ada-002
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
@@ -240,6 +240,11 @@ message ModelOptions {
|
||||
|
||||
repeated string LoraAdapters = 60;
|
||||
repeated float LoraScales = 61;
|
||||
|
||||
repeated string Options = 62;
|
||||
|
||||
string CacheTypeKey = 63;
|
||||
string CacheTypeValue = 64;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -30,9 +30,7 @@ else ifeq ($(OS),Darwin)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
# Until this is tested properly, we disable embedded metal file
|
||||
# as we already embed it as part of the LocalAI assets
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -492,8 +492,8 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
common_init_result common_init = common_init_from_params(params);
|
||||
model = common_init.model;
|
||||
ctx = common_init.context;
|
||||
model = common_init.model.release();
|
||||
ctx = common_init.context.release();
|
||||
if (model == nullptr)
|
||||
{
|
||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||
@@ -681,7 +681,6 @@ struct llama_server_context
|
||||
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
@@ -1213,13 +1212,12 @@ struct llama_server_context
|
||||
{"mirostat", slot.sparams.mirostat},
|
||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||
{"penalize_nl", slot.sparams.penalize_nl},
|
||||
{"stop", slot.params.antiprompt},
|
||||
{"n_predict", slot.params.n_predict},
|
||||
{"n_keep", params.n_keep},
|
||||
{"ignore_eos", slot.sparams.ignore_eos},
|
||||
{"stream", slot.params.stream},
|
||||
// {"logit_bias", slot.sparams.logit_bias},
|
||||
// {"logit_bias", slot.sparams.logit_bias},
|
||||
{"n_probs", slot.sparams.n_probs},
|
||||
{"min_keep", slot.sparams.min_keep},
|
||||
{"grammar", slot.sparams.grammar},
|
||||
@@ -2112,7 +2110,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||
// slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
@@ -2135,7 +2132,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
data["mirostat"] = predict->mirostat();
|
||||
data["mirostat_tau"] = predict->mirostattau();
|
||||
data["mirostat_eta"] = predict->mirostateta();
|
||||
data["penalize_nl"] = predict->penalizenl();
|
||||
data["n_keep"] = predict->nkeep();
|
||||
data["seed"] = predict->seed();
|
||||
data["grammar"] = predict->grammar();
|
||||
@@ -2181,7 +2177,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
// llama.params.sparams.mirostat = predict->mirostat();
|
||||
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
||||
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
||||
// llama.params.sparams.penalize_nl = predict->penalizenl();
|
||||
// llama.params.n_keep = predict->nkeep();
|
||||
// llama.params.seed = predict->seed();
|
||||
// llama.params.sparams.grammar = predict->grammar();
|
||||
@@ -2228,6 +2223,35 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
// }
|
||||
// }
|
||||
|
||||
const std::vector<ggml_type> kv_cache_types = {
|
||||
GGML_TYPE_F32,
|
||||
GGML_TYPE_F16,
|
||||
GGML_TYPE_BF16,
|
||||
GGML_TYPE_Q8_0,
|
||||
GGML_TYPE_Q4_0,
|
||||
GGML_TYPE_Q4_1,
|
||||
GGML_TYPE_IQ4_NL,
|
||||
GGML_TYPE_Q5_0,
|
||||
GGML_TYPE_Q5_1,
|
||||
};
|
||||
|
||||
static ggml_type kv_cache_type_from_str(const std::string & s) {
|
||||
for (const auto & type : kv_cache_types) {
|
||||
if (ggml_type_name(type) == s) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw std::runtime_error("Unsupported cache type: " + s);
|
||||
}
|
||||
|
||||
static std::string get_all_kv_cache_types() {
|
||||
std::ostringstream msg;
|
||||
for (const auto & type : kv_cache_types) {
|
||||
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
|
||||
}
|
||||
return msg.str();
|
||||
}
|
||||
|
||||
static void params_parse(const backend::ModelOptions* request,
|
||||
common_params & params) {
|
||||
|
||||
@@ -2241,6 +2265,12 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
// params.model_alias ??
|
||||
params.model_alias = request->modelfile();
|
||||
if (!request->cachetypekey().empty()) {
|
||||
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
|
||||
}
|
||||
if (!request->cachetypevalue().empty()) {
|
||||
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
|
||||
}
|
||||
params.n_ctx = request->contextsize();
|
||||
//params.memory_f16 = request->f16memory();
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index 342042ff..224db9b5 100644
|
||||
index 3cd0d2fa..6c5e811a 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
+++ b/examples/llava/clip.cpp
|
||||
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||
for (int i = 0; i < num_patches; i++) {
|
||||
- patches_data[i] = i + 1;
|
||||
+ patches_data[i] = i;
|
||||
}
|
||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||
free(patches_data);
|
||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||
for (int i = 0; i < num_patches; i++) {
|
||||
- patches_data[i] = i + 1;
|
||||
+ patches_data[i] = i;
|
||||
}
|
||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||
free(patches_data);
|
||||
25
backend/go/bark/Makefile
Normal file
25
backend/go/bark/Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
INCLUDE_PATH := $(abspath ./)
|
||||
LIBRARY_PATH := $(abspath ./)
|
||||
|
||||
AR?=ar
|
||||
|
||||
BUILD_TYPE?=
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
gobark.o:
|
||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
||||
|
||||
libbark.a: gobark.o
|
||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
||||
$(AR) rcs libbark.a gobark.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
||||
|
||||
clean:
|
||||
rm -f gobark.o libbark.a
|
||||
85
backend/go/bark/gobark.cpp
Normal file
85
backend/go/bark/gobark.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
|
||||
#include "bark.h"
|
||||
#include "gobark.h"
|
||||
#include "common.h"
|
||||
#include "ggml.h"
|
||||
|
||||
struct bark_context *c;
|
||||
|
||||
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
|
||||
if (step == bark_encoding_step::SEMANTIC) {
|
||||
printf("\rGenerating semantic tokens... %d%%", progress);
|
||||
} else if (step == bark_encoding_step::COARSE) {
|
||||
printf("\rGenerating coarse tokens... %d%%", progress);
|
||||
} else if (step == bark_encoding_step::FINE) {
|
||||
printf("\rGenerating fine tokens... %d%%", progress);
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
int load_model(char *model) {
|
||||
// initialize bark context
|
||||
struct bark_context_params ctx_params = bark_context_default_params();
|
||||
bark_params params;
|
||||
|
||||
params.model_path = model;
|
||||
|
||||
// ctx_params.verbosity = verbosity;
|
||||
ctx_params.progress_callback = bark_print_progress_callback;
|
||||
ctx_params.progress_callback_user_data = nullptr;
|
||||
|
||||
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
|
||||
if (!bctx) {
|
||||
fprintf(stderr, "%s: Could not load model\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
c = bctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tts(char *text,int threads, char *dst ) {
|
||||
|
||||
ggml_time_init();
|
||||
const int64_t t_main_start_us = ggml_time_us();
|
||||
|
||||
// generate audio
|
||||
if (!bark_generate_audio(c, text, threads)) {
|
||||
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const float *audio_data = bark_get_audio_data(c);
|
||||
if (audio_data == NULL) {
|
||||
fprintf(stderr, "%s: Could not get audio data\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int audio_arr_size = bark_get_audio_data_size(c);
|
||||
|
||||
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
|
||||
|
||||
write_wav_on_disk(audio_arr, dst);
|
||||
|
||||
// report timing
|
||||
{
|
||||
const int64_t t_main_end_us = ggml_time_us();
|
||||
const int64_t t_load_us = bark_get_load_time(c);
|
||||
const int64_t t_eval_us = bark_get_eval_time(c);
|
||||
|
||||
printf("\n\n");
|
||||
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
|
||||
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
|
||||
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unload() {
|
||||
bark_free(c);
|
||||
}
|
||||
|
||||
52
backend/go/bark/gobark.go
Normal file
52
backend/go/bark/gobark.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package main
|
||||
|
||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
||||
// #include <gobark.h>
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Bark struct {
|
||||
base.SingleThread
|
||||
threads int
|
||||
}
|
||||
|
||||
func (sd *Bark) Load(opts *pb.ModelOptions) error {
|
||||
|
||||
sd.threads = int(opts.Threads)
|
||||
|
||||
modelFile := C.CString(opts.ModelFile)
|
||||
defer C.free(unsafe.Pointer(modelFile))
|
||||
|
||||
ret := C.load_model(modelFile)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
|
||||
t := C.CString(opts.Text)
|
||||
defer C.free(unsafe.Pointer(t))
|
||||
|
||||
dst := C.CString(opts.Dst)
|
||||
defer C.free(unsafe.Pointer(dst))
|
||||
|
||||
threads := C.int(sd.threads)
|
||||
|
||||
ret := C.tts(t, threads, dst)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
8
backend/go/bark/gobark.h
Normal file
8
backend/go/bark/gobark.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int load_model(char *model);
|
||||
int tts(char *text,int threads, char *dst );
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
@@ -15,7 +14,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
21
backend/go/image/stablediffusion-ggml/Makefile
Normal file
21
backend/go/image/stablediffusion-ggml/Makefile
Normal file
@@ -0,0 +1,21 @@
|
||||
INCLUDE_PATH := $(abspath ./)
|
||||
LIBRARY_PATH := $(abspath ./)
|
||||
|
||||
AR?=ar
|
||||
|
||||
BUILD_TYPE?=
|
||||
# keep standard at C11 and C++11
|
||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||
|
||||
# warnings
|
||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
|
||||
gosd.o:
|
||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||
|
||||
libsd.a: gosd.o
|
||||
cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
|
||||
$(AR) rcs libsd.a gosd.o
|
||||
|
||||
clean:
|
||||
rm -f gosd.o libsd.a
|
||||
228
backend/go/image/stablediffusion-ggml/gosd.cpp
Normal file
228
backend/go/image/stablediffusion-ggml/gosd.cpp
Normal file
@@ -0,0 +1,228 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "gosd.h"
|
||||
|
||||
// #include "preprocessing.hpp"
|
||||
#include "flux.hpp"
|
||||
#include "stable-diffusion.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STB_IMAGE_STATIC
|
||||
#include "stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#define STB_IMAGE_WRITE_STATIC
|
||||
#include "stb_image_write.h"
|
||||
|
||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
||||
#define STB_IMAGE_RESIZE_STATIC
|
||||
#include "stb_image_resize.h"
|
||||
|
||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
||||
const char* sample_method_str[] = {
|
||||
"euler_a",
|
||||
"euler",
|
||||
"heun",
|
||||
"dpm2",
|
||||
"dpm++2s_a",
|
||||
"dpm++2m",
|
||||
"dpm++2mv2",
|
||||
"ipndm",
|
||||
"ipndm_v",
|
||||
"lcm",
|
||||
};
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
const char* schedule_str[] = {
|
||||
"default",
|
||||
"discrete",
|
||||
"karras",
|
||||
"exponential",
|
||||
"ays",
|
||||
"gits",
|
||||
};
|
||||
|
||||
sd_ctx_t* sd_c;
|
||||
|
||||
sample_method_t sample_method;
|
||||
|
||||
int load_model(char *model, char* options[], int threads, int diff) {
|
||||
fprintf (stderr, "Loading model!\n");
|
||||
|
||||
char *stableDiffusionModel = "";
|
||||
if (diff == 1 ) {
|
||||
stableDiffusionModel = model;
|
||||
model = "";
|
||||
}
|
||||
|
||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||
char *clip_l_path = "";
|
||||
char *clip_g_path = "";
|
||||
char *t5xxl_path = "";
|
||||
char *vae_path = "";
|
||||
char *scheduler = "";
|
||||
char *sampler = "";
|
||||
|
||||
// If options is not NULL, parse options
|
||||
for (int i = 0; options[i] != NULL; i++) {
|
||||
char *optname = strtok(options[i], ":");
|
||||
char *optval = strtok(NULL, ":");
|
||||
if (optval == NULL) {
|
||||
optval = "true";
|
||||
}
|
||||
|
||||
if (!strcmp(optname, "clip_l_path")) {
|
||||
clip_l_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "clip_g_path")) {
|
||||
clip_g_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "t5xxl_path")) {
|
||||
t5xxl_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "vae_path")) {
|
||||
vae_path = optval;
|
||||
}
|
||||
if (!strcmp(optname, "scheduler")) {
|
||||
scheduler = optval;
|
||||
}
|
||||
if (!strcmp(optname, "sampler")) {
|
||||
sampler = optval;
|
||||
}
|
||||
}
|
||||
|
||||
int sample_method_found = -1;
|
||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
||||
if (!strcmp(sampler, sample_method_str[m])) {
|
||||
sample_method_found = m;
|
||||
}
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
||||
sample_method_found = EULER_A;
|
||||
}
|
||||
sample_method = (sample_method_t)sample_method_found;
|
||||
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
||||
if (!strcmp(scheduler, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (schedule_found == -1) {
|
||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
||||
schedule_found = DEFAULT;
|
||||
}
|
||||
|
||||
schedule_t schedule = (schedule_t)schedule_found;
|
||||
|
||||
fprintf (stderr, "Creating context\n");
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
||||
clip_l_path,
|
||||
clip_g_path,
|
||||
t5xxl_path,
|
||||
stableDiffusionModel,
|
||||
vae_path,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
threads,
|
||||
SD_TYPE_COUNT,
|
||||
STD_DEFAULT_RNG,
|
||||
schedule,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
|
||||
if (sd_ctx == NULL) {
|
||||
fprintf (stderr, "failed loading model (generic error)\n");
|
||||
return 1;
|
||||
}
|
||||
fprintf (stderr, "Created context: OK\n");
|
||||
|
||||
sd_c = sd_ctx;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
|
||||
|
||||
sd_image_t* results;
|
||||
|
||||
std::vector<int> skip_layers = {7, 8, 9};
|
||||
|
||||
fprintf (stderr, "Generating image\n");
|
||||
|
||||
results = txt2img(sd_c,
|
||||
text,
|
||||
negativeText,
|
||||
-1, //clip_skip
|
||||
cfg_scale, // sfg_scale
|
||||
3.5f,
|
||||
width,
|
||||
height,
|
||||
sample_method,
|
||||
steps,
|
||||
seed,
|
||||
1,
|
||||
NULL,
|
||||
0.9f,
|
||||
20.f,
|
||||
false,
|
||||
"",
|
||||
skip_layers.data(),
|
||||
skip_layers.size(),
|
||||
0,
|
||||
0.01,
|
||||
0.2);
|
||||
|
||||
if (results == NULL) {
|
||||
fprintf (stderr, "NO results\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (results[0].data == NULL) {
|
||||
fprintf (stderr, "Results with no data\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf (stderr, "Writing PNG\n");
|
||||
|
||||
fprintf (stderr, "DST: %s\n", dst);
|
||||
fprintf (stderr, "Width: %d\n", results[0].width);
|
||||
fprintf (stderr, "Height: %d\n", results[0].height);
|
||||
fprintf (stderr, "Channel: %d\n", results[0].channel);
|
||||
fprintf (stderr, "Data: %p\n", results[0].data);
|
||||
|
||||
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
|
||||
results[0].data, 0, NULL);
|
||||
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
|
||||
|
||||
// TODO: free results. Why does it crash?
|
||||
|
||||
free(results[0].data);
|
||||
results[0].data = NULL;
|
||||
free(results);
|
||||
fprintf (stderr, "gen_image is done", dst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unload() {
|
||||
free_sd_ctx(sd_c);
|
||||
}
|
||||
|
||||
96
backend/go/image/stablediffusion-ggml/gosd.go
Normal file
96
backend/go/image/stablediffusion-ggml/gosd.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package main
|
||||
|
||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
|
||||
// #include <gosd.h>
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
)
|
||||
|
||||
type SDGGML struct {
|
||||
base.SingleThread
|
||||
threads int
|
||||
sampleMethod string
|
||||
cfgScale float32
|
||||
}
|
||||
|
||||
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
||||
|
||||
sd.threads = int(opts.Threads)
|
||||
|
||||
modelFile := C.CString(opts.ModelFile)
|
||||
defer C.free(unsafe.Pointer(modelFile))
|
||||
|
||||
var options **C.char
|
||||
// prepare the options array to pass to C
|
||||
|
||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
||||
length := C.size_t(len(opts.Options))
|
||||
options = (**C.char)(C.malloc(length * size))
|
||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
||||
|
||||
var diffusionModel int
|
||||
|
||||
var oo []string
|
||||
for _, op := range opts.Options {
|
||||
if op == "diffusion_model" {
|
||||
diffusionModel = 1
|
||||
continue
|
||||
}
|
||||
|
||||
// If it's an option path, we resolve absolute path from the model path
|
||||
if strings.Contains(op, ":") && strings.Contains(op, "path") {
|
||||
data := strings.Split(op, ":")
|
||||
data[1] = filepath.Join(opts.ModelPath, data[1])
|
||||
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
|
||||
oo = append(oo, strings.Join(data, ":"))
|
||||
}
|
||||
} else {
|
||||
oo = append(oo, op)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
|
||||
|
||||
for i, x := range oo {
|
||||
view[i] = C.CString(x)
|
||||
}
|
||||
|
||||
sd.cfgScale = opts.CFGScale
|
||||
|
||||
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("could not load model")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
t := C.CString(opts.PositivePrompt)
|
||||
defer C.free(unsafe.Pointer(t))
|
||||
|
||||
dst := C.CString(opts.Dst)
|
||||
defer C.free(unsafe.Pointer(dst))
|
||||
|
||||
negative := C.CString(opts.NegativePrompt)
|
||||
defer C.free(unsafe.Pointer(negative))
|
||||
|
||||
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("inference failed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
8
backend/go/image/stablediffusion-ggml/gosd.h
Normal file
8
backend/go/image/stablediffusion-ggml/gosd.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int load_model(char *model, char* options[], int threads, int diffusionModel);
|
||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
20
backend/go/image/stablediffusion-ggml/main.go
Normal file
20
backend/go/image/stablediffusion-ggml/main.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Embeddings struct {
|
||||
base.SingleThread
|
||||
bert *bert.Bert
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||
model, err := bert.New(opts.ModelFile)
|
||||
llm.bert = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
||||
|
||||
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,8 +1,9 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -17,6 +17,9 @@
|
||||
# LIMIT_TARGETS="cublas12"
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
|
||||
PYTHON_VERSION="3.10"
|
||||
|
||||
function init() {
|
||||
# Name of the backend (directory name)
|
||||
BACKEND_NAME=${PWD##*/}
|
||||
@@ -88,7 +91,7 @@ function getBuildProfile() {
|
||||
# always result in an activated virtual environment
|
||||
function ensureVenv() {
|
||||
if [ ! -d "${EDIR}/venv" ]; then
|
||||
uv venv ${EDIR}/venv
|
||||
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
|
||||
echo "virtualenv created"
|
||||
fi
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,9 +1,10 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
transformers
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -1,9 +1,10 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchvision
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchvision==0.18.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
diffusers
|
||||
opencv-python
|
||||
transformers
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,3 +1,7 @@
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,4 +1,8 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,3 +1,7 @@
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,4 +1,8 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,14 +1,15 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.22.0
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
|
||||
@@ -1,20 +1,17 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
librosa
|
||||
faster-whisper
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.22.0
|
||||
eng_to_ipa==0.0.2
|
||||
inflect
|
||||
unidecode
|
||||
whisper-timestamped
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin
|
||||
cn2an==0.5.22
|
||||
numpy==1.22.0
|
||||
networkx==2.8.8
|
||||
jieba==0.42.1
|
||||
gradio==3.48.0
|
||||
gradio==5.9.1
|
||||
langid==1.1.6
|
||||
llvmlite==0.43.0
|
||||
setuptools
|
||||
@@ -1,8 +1,8 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,3 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
certifi
|
||||
llvmlite==0.43.0
|
||||
setuptools
|
||||
@@ -1,8 +1,9 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
transformers
|
||||
accelerate
|
||||
torch
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
rerankers[transformers]
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,8 +1,9 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
datasets
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
transformers
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
accelerate
|
||||
torch
|
||||
torch==2.3.1+cxx11.abi
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
scipy==1.14.0
|
||||
certifi
|
||||
@@ -1,6 +1,7 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools
|
||||
@@ -1,7 +1,7 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
accelerate
|
||||
torch
|
||||
torchaudio
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
@@ -1,3 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
certifi
|
||||
setuptools
|
||||
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
|
||||
git clone https://github.com/vllm-project/vllm
|
||||
fi
|
||||
pushd vllm
|
||||
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.0 protobuf bitsandbytes
|
||||
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes
|
||||
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
VLLM_TARGET_DEVICE=cpu python setup.py install
|
||||
popd
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
accelerate
|
||||
torch
|
||||
torch==2.3.1+cxx11.abi
|
||||
transformers
|
||||
optimum[openvino]
|
||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
bitsandbytes
|
||||
setuptools
|
||||
bitsandbytes
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.68.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -1,38 +0,0 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
|
||||
// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
|
||||
type Application struct {
|
||||
|
||||
// Application-Level Config
|
||||
ApplicationConfig *config.ApplicationConfig
|
||||
// ApplicationState *ApplicationState
|
||||
|
||||
// Core Low-Level Services
|
||||
BackendConfigLoader *config.BackendConfigLoader
|
||||
ModelLoader *model.ModelLoader
|
||||
|
||||
// Backend Services
|
||||
// EmbeddingsBackendService *backend.EmbeddingsBackendService
|
||||
// ImageGenerationBackendService *backend.ImageGenerationBackendService
|
||||
// LLMBackendService *backend.LLMBackendService
|
||||
// TranscriptionBackendService *backend.TranscriptionBackendService
|
||||
// TextToSpeechBackendService *backend.TextToSpeechBackendService
|
||||
|
||||
// LocalAI System Services
|
||||
BackendMonitorService *services.BackendMonitorService
|
||||
GalleryService *services.GalleryService
|
||||
LocalAIMetricsService *services.LocalAIMetricsService
|
||||
// OpenAIService *services.OpenAIService
|
||||
}
|
||||
|
||||
// TODO [NEXT PR?]: Break up ApplicationConfig.
|
||||
// Migrate over stuff that is not set via config at all - especially runtime stuff
|
||||
type ApplicationState struct {
|
||||
}
|
||||
39
core/application/application.go
Normal file
39
core/application/application.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package application
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
)
|
||||
|
||||
type Application struct {
|
||||
backendLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
applicationConfig *config.ApplicationConfig
|
||||
templatesEvaluator *templates.Evaluator
|
||||
}
|
||||
|
||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
||||
return &Application{
|
||||
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
||||
modelLoader: model.NewModelLoader(appConfig.ModelPath),
|
||||
applicationConfig: appConfig,
|
||||
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Application) BackendLoader() *config.BackendConfigLoader {
|
||||
return a.backendLoader
|
||||
}
|
||||
|
||||
func (a *Application) ModelLoader() *model.ModelLoader {
|
||||
return a.modelLoader
|
||||
}
|
||||
|
||||
func (a *Application) ApplicationConfig() *config.ApplicationConfig {
|
||||
return a.applicationConfig
|
||||
}
|
||||
|
||||
func (a *Application) TemplatesEvaluator() *templates.Evaluator {
|
||||
return a.templatesEvaluator
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package startup
|
||||
package application
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
@@ -8,8 +8,8 @@ import (
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"dario.cat/mergo"
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -1,15 +1,15 @@
|
||||
package startup
|
||||
package application
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/mudler/LocalAI/core"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/assets"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/library"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
|
||||
@@ -17,8 +17,9 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
|
||||
func New(opts ...config.AppOption) (*Application, error) {
|
||||
options := config.NewApplicationConfig(opts...)
|
||||
application := newApplication(options)
|
||||
|
||||
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
|
||||
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
||||
@@ -36,28 +37,28 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
|
||||
// Make sure directories exists
|
||||
if options.ModelPath == "" {
|
||||
return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
|
||||
return nil, fmt.Errorf("options.ModelPath cannot be empty")
|
||||
}
|
||||
err = os.MkdirAll(options.ModelPath, 0750)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
||||
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
||||
}
|
||||
if options.ImageDir != "" {
|
||||
err := os.MkdirAll(options.ImageDir, 0750)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
||||
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
||||
}
|
||||
}
|
||||
if options.AudioDir != "" {
|
||||
err := os.MkdirAll(options.AudioDir, 0750)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
|
||||
return nil, fmt.Errorf("unable to create AudioDir: %q", err)
|
||||
}
|
||||
}
|
||||
if options.UploadDir != "" {
|
||||
err := os.MkdirAll(options.UploadDir, 0750)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
|
||||
return nil, fmt.Errorf("unable to create UploadDir: %q", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,39 +66,36 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
cl := config.NewBackendConfigLoader(options.ModelPath)
|
||||
ml := model.NewModelLoader(options.ModelPath)
|
||||
|
||||
configLoaderOpts := options.ToConfigLoaderOptions()
|
||||
|
||||
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
|
||||
if err := application.BackendLoader().LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
|
||||
log.Error().Err(err).Msg("error loading config files")
|
||||
}
|
||||
|
||||
if options.ConfigFile != "" {
|
||||
if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
|
||||
if err := application.BackendLoader().LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
|
||||
log.Error().Err(err).Msg("error loading config file")
|
||||
}
|
||||
}
|
||||
|
||||
if err := cl.Preload(options.ModelPath); err != nil {
|
||||
if err := application.BackendLoader().Preload(options.ModelPath); err != nil {
|
||||
log.Error().Err(err).Msg("error downloading models")
|
||||
}
|
||||
|
||||
if options.PreloadJSONModels != "" {
|
||||
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
||||
return nil, nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if options.PreloadModelsFromPath != "" {
|
||||
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
||||
return nil, nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if options.Debug {
|
||||
for _, v := range cl.GetAllBackendConfigs() {
|
||||
for _, v := range application.BackendLoader().GetAllBackendConfigs() {
|
||||
log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
|
||||
}
|
||||
}
|
||||
@@ -123,7 +121,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
log.Debug().Msgf("Context canceled, shutting down")
|
||||
err := ml.StopAllGRPC()
|
||||
err := application.ModelLoader().StopAllGRPC()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error while stopping all grpc backends")
|
||||
}
|
||||
@@ -131,12 +129,12 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
|
||||
if options.WatchDog {
|
||||
wd := model.NewWatchDog(
|
||||
ml,
|
||||
application.ModelLoader(),
|
||||
options.WatchDogBusyTimeout,
|
||||
options.WatchDogIdleTimeout,
|
||||
options.WatchDogBusy,
|
||||
options.WatchDogIdle)
|
||||
ml.SetWatchDog(wd)
|
||||
application.ModelLoader().SetWatchDog(wd)
|
||||
go wd.Run()
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
@@ -147,7 +145,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
|
||||
if options.LoadToMemory != nil {
|
||||
for _, m := range options.LoadToMemory {
|
||||
cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath,
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
|
||||
config.LoadOptionDebug(options.Debug),
|
||||
config.LoadOptionThreads(options.Threads),
|
||||
config.LoadOptionContextSize(options.ContextSize),
|
||||
@@ -155,7 +153,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
config.ModelPath(options.ModelPath),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
|
||||
@@ -163,9 +161,9 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
o := backend.ModelOptions(*cfg, options)
|
||||
|
||||
var backendErr error
|
||||
_, backendErr = ml.Load(o...)
|
||||
_, backendErr = application.ModelLoader().Load(o...)
|
||||
if backendErr != nil {
|
||||
return nil, nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -174,7 +172,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
startWatcher(options)
|
||||
|
||||
log.Info().Msg("core/startup process completed!")
|
||||
return cl, ml, options, nil
|
||||
return application, nil
|
||||
}
|
||||
|
||||
func startWatcher(options *config.ApplicationConfig) {
|
||||
@@ -201,32 +199,3 @@ func startWatcher(options *config.ApplicationConfig) {
|
||||
log.Error().Err(err).Msg("failed creating watcher")
|
||||
}
|
||||
}
|
||||
|
||||
// In Lieu of a proper DI framework, this function wires up the Application manually.
|
||||
// This is in core/startup rather than core/state.go to keep package references clean!
|
||||
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
|
||||
app := &core.Application{
|
||||
ApplicationConfig: appConfig,
|
||||
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
||||
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
|
||||
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
|
||||
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
|
||||
|
||||
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
|
||||
}
|
||||
|
||||
return app
|
||||
}
|
||||
@@ -117,8 +117,12 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
ss := ""
|
||||
|
||||
var partialRune []byte
|
||||
err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
|
||||
partialRune = append(partialRune, chars...)
|
||||
err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
|
||||
msg := reply.Message
|
||||
partialRune = append(partialRune, msg...)
|
||||
|
||||
tokenUsage.Prompt = int(reply.PromptTokens)
|
||||
tokenUsage.Completion = int(reply.Tokens)
|
||||
|
||||
for len(partialRune) > 0 {
|
||||
r, size := utf8.DecodeRune(partialRune)
|
||||
@@ -132,6 +136,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
|
||||
partialRune = partialRune[size:]
|
||||
}
|
||||
|
||||
if len(msg) == 0 {
|
||||
tokenCallback("", tokenUsage)
|
||||
}
|
||||
})
|
||||
return LLMResponse{
|
||||
Response: ss,
|
||||
|
||||
@@ -122,7 +122,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.Diffusers.CFGScale,
|
||||
CFGScale: c.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraScale: c.LoraScale,
|
||||
LoraAdapters: c.LoraAdapters,
|
||||
@@ -132,6 +132,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||
CLIPModel: c.Diffusers.ClipModel,
|
||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||
Options: c.Options,
|
||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||
ControlNet: c.Diffusers.ControlNet,
|
||||
ContextSize: int32(ctxSize),
|
||||
@@ -150,6 +151,8 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
TensorParallelSize: int32(c.TensorParallelSize),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
CacheTypeValue: c.CacheTypeV,
|
||||
NoKVOffload: c.NoKVOffloading,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
|
||||
@@ -6,12 +6,12 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/startup"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
|
||||
if r.PreloadBackendOnly {
|
||||
_, _, _, err := startup.Startup(opts...)
|
||||
_, err := application.New(opts...)
|
||||
return err
|
||||
}
|
||||
|
||||
cl, ml, options, err := startup.Startup(opts...)
|
||||
app, err := application.New(opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
||||
}
|
||||
|
||||
appHTTP, err := http.App(cl, ml, options)
|
||||
appHTTP, err := http.API(app)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error during HTTP App construction")
|
||||
return err
|
||||
|
||||
@@ -72,6 +72,8 @@ type BackendConfig struct {
|
||||
|
||||
Description string `yaml:"description"`
|
||||
Usage string `yaml:"usage"`
|
||||
|
||||
Options []string `yaml:"options"`
|
||||
}
|
||||
|
||||
type File struct {
|
||||
@@ -97,16 +99,15 @@ type GRPC struct {
|
||||
}
|
||||
|
||||
type Diffusers struct {
|
||||
CUDA bool `yaml:"cuda"`
|
||||
PipelineType string `yaml:"pipeline_type"`
|
||||
SchedulerType string `yaml:"scheduler_type"`
|
||||
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
||||
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
||||
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
||||
ClipModel string `yaml:"clip_model"` // Clip model to use
|
||||
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
||||
ControlNet string `yaml:"control_net"`
|
||||
CUDA bool `yaml:"cuda"`
|
||||
PipelineType string `yaml:"pipeline_type"`
|
||||
SchedulerType string `yaml:"scheduler_type"`
|
||||
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
||||
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
||||
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
||||
ClipModel string `yaml:"clip_model"` // Clip model to use
|
||||
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
||||
ControlNet string `yaml:"control_net"`
|
||||
}
|
||||
|
||||
// LLMConfig is a struct that holds the configuration that are
|
||||
@@ -154,8 +155,10 @@ type LLMConfig struct {
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
FlashAttention bool `yaml:"flash_attention"`
|
||||
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
||||
FlashAttention bool `yaml:"flash_attention"`
|
||||
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
||||
CacheTypeK string `yaml:"cache_type_k"`
|
||||
CacheTypeV string `yaml:"cache_type_v"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
ModelType string `yaml:"type"`
|
||||
@@ -164,6 +167,8 @@ type LLMConfig struct {
|
||||
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||
|
||||
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||
}
|
||||
|
||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
||||
@@ -201,6 +206,8 @@ type TemplateConfig struct {
|
||||
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
||||
|
||||
Multimodal string `yaml:"multimodal"`
|
||||
|
||||
JinjaTemplate bool `yaml:"jinja_template"`
|
||||
}
|
||||
|
||||
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||
|
||||
@@ -26,14 +26,14 @@ const (
|
||||
type settingsConfig struct {
|
||||
StopWords []string
|
||||
TemplateConfig TemplateConfig
|
||||
RepeatPenalty float64
|
||||
RepeatPenalty float64
|
||||
}
|
||||
|
||||
// default settings to adopt with a given model family
|
||||
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
|
||||
Gemma: {
|
||||
RepeatPenalty: 1.0,
|
||||
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
|
||||
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input }}\n<start_of_turn>model\n",
|
||||
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
|
||||
@@ -200,6 +200,18 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
||||
} else {
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
|
||||
}
|
||||
|
||||
if cfg.HasTemplate() {
|
||||
return
|
||||
}
|
||||
|
||||
// identify from well known templates first, otherwise use the raw jinja template
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found {
|
||||
// try to use the jinja template
|
||||
cfg.TemplateConfig.JinjaTemplate = true
|
||||
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
||||
}
|
||||
}
|
||||
|
||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
|
||||
@@ -12,6 +12,8 @@ import (
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||
|
||||
var _ = Describe("Model test", func() {
|
||||
|
||||
Context("Downloading", func() {
|
||||
@@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {
|
||||
|
||||
gallery := []GalleryModel{{
|
||||
Name: "bert",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
}}
|
||||
out, err := yaml.Marshal(gallery)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models)).To(Equal(1))
|
||||
Expect(models[0].Name).To(Equal("bert"))
|
||||
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
|
||||
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
|
||||
Expect(models[0].Installed).To(BeFalse())
|
||||
|
||||
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
|
||||
@@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
|
||||
content := map[string]interface{}{}
|
||||
err = yaml.Unmarshal(dat, &content)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
|
||||
models, err = AvailableGalleryModels(galleries, tempdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
@@ -14,10 +14,9 @@ import (
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/http/routes"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/contrib/fiberzerolog"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -49,18 +48,18 @@ var embedDirStatic embed.FS
|
||||
// @in header
|
||||
// @name Authorization
|
||||
|
||||
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
|
||||
func API(application *application.Application) (*fiber.App, error) {
|
||||
|
||||
fiberCfg := fiber.Config{
|
||||
Views: renderEngine(),
|
||||
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
// We disable the Fiber startup message as it does not conform to structured logging.
|
||||
// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
|
||||
DisableStartupMessage: true,
|
||||
// Override default error handler
|
||||
}
|
||||
|
||||
if !appConfig.OpaqueErrors {
|
||||
if !application.ApplicationConfig().OpaqueErrors {
|
||||
// Normally, return errors as JSON responses
|
||||
fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
@@ -86,9 +85,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
}
|
||||
}
|
||||
|
||||
app := fiber.New(fiberCfg)
|
||||
router := fiber.New(fiberCfg)
|
||||
|
||||
app.Hooks().OnListen(func(listenData fiber.ListenData) error {
|
||||
router.Use(middleware.StripPathPrefix())
|
||||
|
||||
router.Hooks().OnListen(func(listenData fiber.ListenData) error {
|
||||
scheme := "http"
|
||||
if listenData.TLS {
|
||||
scheme = "https"
|
||||
@@ -99,82 +100,82 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
|
||||
// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
|
||||
logger := log.Logger
|
||||
app.Use(fiberzerolog.New(fiberzerolog.Config{
|
||||
router.Use(fiberzerolog.New(fiberzerolog.Config{
|
||||
Logger: &logger,
|
||||
}))
|
||||
|
||||
// Default middleware config
|
||||
|
||||
if !appConfig.Debug {
|
||||
app.Use(recover.New())
|
||||
if !application.ApplicationConfig().Debug {
|
||||
router.Use(recover.New())
|
||||
}
|
||||
|
||||
if !appConfig.DisableMetrics {
|
||||
if !application.ApplicationConfig().DisableMetrics {
|
||||
metricsService, err := services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if metricsService != nil {
|
||||
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
|
||||
app.Hooks().OnShutdown(func() error {
|
||||
router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
|
||||
router.Hooks().OnShutdown(func() error {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
// Health Checks should always be exempt from auth, so register these first
|
||||
routes.HealthRoutes(app)
|
||||
routes.HealthRoutes(router)
|
||||
|
||||
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
|
||||
kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig())
|
||||
if err != nil || kaConfig == nil {
|
||||
return nil, fmt.Errorf("failed to create key auth config: %w", err)
|
||||
}
|
||||
|
||||
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
||||
app.Use(v2keyauth.New(*kaConfig))
|
||||
router.Use(v2keyauth.New(*kaConfig))
|
||||
|
||||
if appConfig.CORS {
|
||||
if application.ApplicationConfig().CORS {
|
||||
var c func(ctx *fiber.Ctx) error
|
||||
if appConfig.CORSAllowOrigins == "" {
|
||||
if application.ApplicationConfig().CORSAllowOrigins == "" {
|
||||
c = cors.New()
|
||||
} else {
|
||||
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
|
||||
c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins})
|
||||
}
|
||||
|
||||
app.Use(c)
|
||||
router.Use(c)
|
||||
}
|
||||
|
||||
if appConfig.CSRF {
|
||||
if application.ApplicationConfig().CSRF {
|
||||
log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
|
||||
app.Use(csrf.New())
|
||||
router.Use(csrf.New())
|
||||
}
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
galleryService := services.NewGalleryService(appConfig)
|
||||
galleryService.Start(appConfig.Context, cl)
|
||||
galleryService := services.NewGalleryService(application.ApplicationConfig())
|
||||
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||
|
||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
|
||||
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
|
||||
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
|
||||
if !appConfig.DisableWebUI {
|
||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
|
||||
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, application)
|
||||
if !application.ApplicationConfig().DisableWebUI {
|
||||
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
}
|
||||
routes.RegisterJINARoutes(app, cl, ml, appConfig)
|
||||
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
app.Use(favicon.New(favicon.Config{
|
||||
router.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
}))
|
||||
|
||||
app.Use("/static", filesystem.New(filesystem.Config{
|
||||
router.Use("/static", filesystem.New(filesystem.Config{
|
||||
Root: httpFS,
|
||||
PathPrefix: "static",
|
||||
Browse: true,
|
||||
@@ -182,7 +183,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
app.Use(notFoundHandler)
|
||||
router.Use(notFoundHandler)
|
||||
|
||||
return app, nil
|
||||
return router, nil
|
||||
}
|
||||
|
||||
@@ -5,24 +5,21 @@ import (
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
. "github.com/mudler/LocalAI/core/http"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/startup"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"gopkg.in/yaml.v3"
|
||||
@@ -240,6 +237,33 @@ func postInvalidRequest(url string) (error, int) {
|
||||
return nil, resp.StatusCode
|
||||
}
|
||||
|
||||
func getRequest(url string, header http.Header) (error, int, []byte) {
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return err, -1, nil
|
||||
}
|
||||
|
||||
req.Header = header
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return err, -1, nil
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err, -1, nil
|
||||
}
|
||||
|
||||
return nil, resp.StatusCode, body
|
||||
}
|
||||
|
||||
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||
|
||||
//go:embed backend-assets/*
|
||||
var backendAssets embed.FS
|
||||
|
||||
@@ -252,9 +276,6 @@ var _ = Describe("API test", func() {
|
||||
var cancel context.CancelFunc
|
||||
var tmpdir string
|
||||
var modelDir string
|
||||
var bcl *config.BackendConfigLoader
|
||||
var ml *model.ModelLoader
|
||||
var applicationConfig *config.ApplicationConfig
|
||||
|
||||
commonOpts := []config.AppOption{
|
||||
config.WithDebug(true),
|
||||
@@ -279,13 +300,13 @@ var _ = Describe("API test", func() {
|
||||
g := []gallery.GalleryModel{
|
||||
{
|
||||
Name: "bert",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
{
|
||||
Name: "bert2",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
Overrides: map[string]interface{}{"foo": "bar"},
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||
},
|
||||
}
|
||||
out, err := yaml.Marshal(g)
|
||||
@@ -300,7 +321,7 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
}
|
||||
|
||||
bcl, ml, applicationConfig, err = startup.Startup(
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithGalleries(galleries),
|
||||
@@ -310,7 +331,7 @@ var _ = Describe("API test", func() {
|
||||
config.WithBackendAssetsOutput(backendAssetsDir))...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(bcl, ml, applicationConfig)
|
||||
app, err = API(application)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
@@ -349,6 +370,33 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
})
|
||||
|
||||
Context("URL routing Tests", func() {
|
||||
It("Should support reverse-proxy when unauthenticated", func() {
|
||||
|
||||
err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
|
||||
"X-Forwarded-Proto": {"https"},
|
||||
"X-Forwarded-Host": {"example.org"},
|
||||
"X-Forwarded-Prefix": {"/myprefix/"},
|
||||
})
|
||||
Expect(err).To(BeNil(), "error")
|
||||
Expect(sc).To(Equal(401), "status code")
|
||||
Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
|
||||
})
|
||||
|
||||
It("Should support reverse-proxy when authenticated", func() {
|
||||
|
||||
err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
|
||||
"Authorization": {bearerKey},
|
||||
"X-Forwarded-Proto": {"https"},
|
||||
"X-Forwarded-Host": {"example.org"},
|
||||
"X-Forwarded-Prefix": {"/myprefix/"},
|
||||
})
|
||||
Expect(err).To(BeNil(), "error")
|
||||
Expect(sc).To(Equal(200), "status code")
|
||||
Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
|
||||
})
|
||||
})
|
||||
|
||||
Context("Applying models", func() {
|
||||
|
||||
It("applies models from a gallery", func() {
|
||||
@@ -383,7 +431,7 @@ var _ = Describe("API test", func() {
|
||||
content := map[string]interface{}{}
|
||||
err = yaml.Unmarshal(dat, &content)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
Expect(content["foo"]).To(Equal("bar"))
|
||||
|
||||
models, err = getModels("http://127.0.0.1:9090/models/available")
|
||||
@@ -402,7 +450,7 @@ var _ = Describe("API test", func() {
|
||||
It("overrides models", func() {
|
||||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
Name: "bert",
|
||||
Overrides: map[string]interface{}{
|
||||
"backend": "llama",
|
||||
@@ -451,7 +499,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
It("apply models without overrides", func() {
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
Name: "bert",
|
||||
Overrides: map[string]interface{}{},
|
||||
})
|
||||
@@ -471,7 +519,7 @@ var _ = Describe("API test", func() {
|
||||
content := map[string]interface{}{}
|
||||
err = yaml.Unmarshal(dat, &content)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
})
|
||||
|
||||
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||
@@ -539,7 +587,7 @@ var _ = Describe("API test", func() {
|
||||
var res map[string]string
|
||||
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
|
||||
@@ -641,7 +689,7 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
}
|
||||
|
||||
bcl, ml, applicationConfig, err = startup.Startup(
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithAudioDir(tmpdir),
|
||||
@@ -652,7 +700,7 @@ var _ = Describe("API test", func() {
|
||||
config.WithBackendAssetsOutput(tmpdir))...,
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
app, err = App(bcl, ml, applicationConfig)
|
||||
app, err = API(application)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
@@ -708,7 +756,7 @@ var _ = Describe("API test", func() {
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
|
||||
|
||||
Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
|
||||
Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
|
||||
Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave")))
|
||||
})
|
||||
It("installs and is capable to generate images", Label("stablediffusion"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
@@ -772,14 +820,14 @@ var _ = Describe("API test", func() {
|
||||
|
||||
var err error
|
||||
|
||||
bcl, ml, applicationConfig, err = startup.Startup(
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
config.WithContext(c),
|
||||
config.WithModelPath(modelPath),
|
||||
)...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
app, err = App(bcl, ml, applicationConfig)
|
||||
app, err = API(application)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
@@ -806,7 +854,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
||||
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
|
||||
})
|
||||
It("can generate completions via ggml", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
|
||||
@@ -866,8 +914,8 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred(), err)
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
|
||||
|
||||
sunEmbedding := resp.Data[0].Embedding
|
||||
resp2, err := client.CreateEmbeddings(
|
||||
@@ -911,71 +959,6 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
})
|
||||
|
||||
Context("backends", func() {
|
||||
It("runs rwkv completion", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices) > 0).To(BeTrue())
|
||||
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||
|
||||
stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
|
||||
Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer stream.Close()
|
||||
|
||||
tokens := 0
|
||||
text := ""
|
||||
for {
|
||||
response, err := stream.Recv()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
text += response.Choices[0].Text
|
||||
tokens++
|
||||
}
|
||||
Expect(text).ToNot(BeEmpty())
|
||||
Expect(text).To(ContainSubstring("five"))
|
||||
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
|
||||
})
|
||||
It("runs rwkv chat completion", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateChatCompletion(context.TODO(),
|
||||
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices) > 0).To(BeTrue())
|
||||
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
|
||||
|
||||
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer stream.Close()
|
||||
|
||||
tokens := 0
|
||||
text := ""
|
||||
for {
|
||||
response, err := stream.Recv()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
text += response.Choices[0].Delta.Content
|
||||
tokens++
|
||||
}
|
||||
Expect(text).ToNot(BeEmpty())
|
||||
Expect(strings.ToLower(text)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
|
||||
|
||||
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
|
||||
})
|
||||
})
|
||||
|
||||
// See tests/integration/stores_test
|
||||
Context("Stores", Label("stores"), func() {
|
||||
|
||||
@@ -1055,14 +1038,14 @@ var _ = Describe("API test", func() {
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
var err error
|
||||
bcl, ml, applicationConfig, err = startup.Startup(
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithContext(c),
|
||||
config.WithModelPath(modelPath),
|
||||
config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
app, err = App(bcl, ml, applicationConfig)
|
||||
app, err = API(application)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
@@ -16,7 +16,7 @@ func installButton(galleryName string) elem.Node {
|
||||
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/install/model/" + galleryName,
|
||||
"hx-post": "browse/install/model/" + galleryName,
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
@@ -36,7 +36,7 @@ func reInstallButton(galleryName string) elem.Node {
|
||||
"hx-target": "#action-div-" + dropBadChars(galleryName),
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/install/model/" + galleryName,
|
||||
"hx-post": "browse/install/model/" + galleryName,
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
@@ -80,7 +80,7 @@ func deleteButton(galleryID string) elem.Node {
|
||||
"hx-target": "#action-div-" + dropBadChars(galleryID),
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/delete/model/" + galleryID,
|
||||
"hx-post": "browse/delete/model/" + galleryID,
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
|
||||
@@ -47,7 +47,7 @@ func searchableElement(text, icon string) elem.Node {
|
||||
// "value": text,
|
||||
//"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
|
||||
"href": "#!",
|
||||
"hx-post": "/browse/search/models",
|
||||
"hx-post": "browse/search/models",
|
||||
"hx-target": "#search-results",
|
||||
// TODO: this doesn't work
|
||||
// "hx-vals": `{ \"search\": \"` + text + `\" }`,
|
||||
|
||||
@@ -64,7 +64,7 @@ func StartProgressBar(uid, progress, text string) string {
|
||||
return elem.Div(
|
||||
attrs.Props{
|
||||
"hx-trigger": "done",
|
||||
"hx-get": "/browse/job/" + uid,
|
||||
"hx-get": "browse/job/" + uid,
|
||||
"hx-swap": "outerHTML",
|
||||
"hx-target": "this",
|
||||
},
|
||||
@@ -77,7 +77,7 @@ func StartProgressBar(uid, progress, text string) string {
|
||||
},
|
||||
elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
|
||||
elem.Div(attrs.Props{
|
||||
"hx-get": "/browse/job/progress/" + uid,
|
||||
"hx-get": "browse/job/progress/" + uid,
|
||||
"hx-trigger": "every 600ms",
|
||||
"hx-target": "this",
|
||||
"hx-swap": "innerHTML",
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/explorer"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
)
|
||||
|
||||
@@ -14,6 +15,7 @@ func Dashboard() func(*fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
}
|
||||
|
||||
if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -82,7 +83,8 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
|
||||
Galleries: mgs.galleries,
|
||||
ConfigURL: input.ConfigURL,
|
||||
}
|
||||
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
|
||||
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +107,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
|
||||
return err
|
||||
}
|
||||
|
||||
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
@@ -32,6 +33,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"Models": modelsWithoutConfig,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"GalleryConfig": galleryConfigs,
|
||||
|
||||
@@ -14,6 +14,8 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -24,7 +26,7 @@ import (
|
||||
// @Param request body schema.OpenAIRequest true "query params"
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/chat/completions [post]
|
||||
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
var id, textContentToReturn string
|
||||
var created int
|
||||
|
||||
@@ -294,148 +296,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// If we are using the tokenizer template, we don't need to process the messages
|
||||
// unless we are processing functions
|
||||
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
|
||||
suppressConfigSystemPrompt := false
|
||||
mess := []string{}
|
||||
for messageIndex, i := range input.Messages {
|
||||
var content string
|
||||
role := i.Role
|
||||
|
||||
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
|
||||
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
|
||||
if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
|
||||
roleFn := "assistant_function_call"
|
||||
r := config.Roles[roleFn]
|
||||
if r != "" {
|
||||
role = roleFn
|
||||
}
|
||||
}
|
||||
r := config.Roles[role]
|
||||
contentExists := i.Content != nil && i.StringContent != ""
|
||||
|
||||
fcall := i.FunctionCall
|
||||
if len(i.ToolCalls) > 0 {
|
||||
fcall = i.ToolCalls
|
||||
}
|
||||
|
||||
// First attempt to populate content via a chat message specific template
|
||||
if config.TemplateConfig.ChatMessage != "" {
|
||||
chatMessageData := model.ChatMessageTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Role: r,
|
||||
RoleName: role,
|
||||
Content: i.StringContent,
|
||||
FunctionCall: fcall,
|
||||
FunctionName: i.Name,
|
||||
LastMessage: messageIndex == (len(input.Messages) - 1),
|
||||
Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
|
||||
MessageIndex: messageIndex,
|
||||
}
|
||||
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
|
||||
} else {
|
||||
if templatedChatMessage == "" {
|
||||
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
|
||||
continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
|
||||
}
|
||||
log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
|
||||
content = templatedChatMessage
|
||||
}
|
||||
}
|
||||
|
||||
marshalAnyRole := func(f any) {
|
||||
j, err := json.Marshal(f)
|
||||
if err == nil {
|
||||
if contentExists {
|
||||
content += "\n" + fmt.Sprint(r, " ", string(j))
|
||||
} else {
|
||||
content = fmt.Sprint(r, " ", string(j))
|
||||
}
|
||||
}
|
||||
}
|
||||
marshalAny := func(f any) {
|
||||
j, err := json.Marshal(f)
|
||||
if err == nil {
|
||||
if contentExists {
|
||||
content += "\n" + string(j)
|
||||
} else {
|
||||
content = string(j)
|
||||
}
|
||||
}
|
||||
}
|
||||
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
|
||||
if content == "" {
|
||||
if r != "" {
|
||||
if contentExists {
|
||||
content = fmt.Sprint(r, i.StringContent)
|
||||
}
|
||||
|
||||
if i.FunctionCall != nil {
|
||||
marshalAnyRole(i.FunctionCall)
|
||||
}
|
||||
if i.ToolCalls != nil {
|
||||
marshalAnyRole(i.ToolCalls)
|
||||
}
|
||||
} else {
|
||||
if contentExists {
|
||||
content = fmt.Sprint(i.StringContent)
|
||||
}
|
||||
if i.FunctionCall != nil {
|
||||
marshalAny(i.FunctionCall)
|
||||
}
|
||||
if i.ToolCalls != nil {
|
||||
marshalAny(i.ToolCalls)
|
||||
}
|
||||
}
|
||||
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
|
||||
if contentExists && role == "system" {
|
||||
suppressConfigSystemPrompt = true
|
||||
}
|
||||
}
|
||||
|
||||
mess = append(mess, content)
|
||||
}
|
||||
|
||||
joinCharacter := "\n"
|
||||
if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
|
||||
joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, joinCharacter)
|
||||
log.Debug().Msgf("Prompt (before templating): %s", predInput)
|
||||
|
||||
templateFile := ""
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||
templateFile = config.Model
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Chat != "" && !shouldUseFn {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Functions != "" && shouldUseFn {
|
||||
templateFile = config.TemplateConfig.Functions
|
||||
}
|
||||
|
||||
if templateFile != "" {
|
||||
templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
SuppressSystemPrompt: suppressConfigSystemPrompt,
|
||||
Input: predInput,
|
||||
Functions: funcs,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
} else {
|
||||
log.Debug().Msgf("Template failed loading: %s", err.Error())
|
||||
}
|
||||
}
|
||||
predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)
|
||||
|
||||
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
||||
if shouldUseFn && config.Grammar != "" {
|
||||
if config.Grammar != "" {
|
||||
log.Debug().Msgf("Grammar: %+v", config.Grammar)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
@@ -25,7 +26,7 @@ import (
|
||||
// @Param request body schema.OpenAIRequest true "query params"
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/completions [post]
|
||||
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
@@ -94,17 +95,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
c.Set("Transfer-Encoding", "chunked")
|
||||
}
|
||||
|
||||
templateFile := ""
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||
templateFile = config.Model
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Completion != "" {
|
||||
templateFile = config.TemplateConfig.Completion
|
||||
}
|
||||
|
||||
if input.Stream {
|
||||
if len(config.PromptStrings) > 1 {
|
||||
return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
|
||||
@@ -112,15 +102,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
|
||||
predInput := config.PromptStrings[0]
|
||||
|
||||
if templateFile != "" {
|
||||
templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
Input: predInput,
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
|
||||
Input: predInput,
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
@@ -165,16 +153,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
totalTokenUsage := backend.TokenUsage{}
|
||||
|
||||
for k, i := range config.PromptStrings {
|
||||
if templateFile != "" {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Input: i,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Input: i,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -21,7 +22,8 @@ import (
|
||||
// @Param request body schema.OpenAIRequest true "query params"
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/edits [post]
|
||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
@@ -35,31 +37,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
templateFile := ""
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||
templateFile = config.Model
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Edit != "" {
|
||||
templateFile = config.TemplateConfig.Edit
|
||||
}
|
||||
|
||||
var result []schema.Choice
|
||||
totalTokenUsage := backend.TokenUsage{}
|
||||
|
||||
for _, i := range config.InputStrings {
|
||||
if templateFile != "" {
|
||||
templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
Input: i,
|
||||
Instruction: input.Instruction,
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
|
||||
Input: i,
|
||||
Instruction: input.Instruction,
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2/middleware/favicon"
|
||||
"github.com/gofiber/fiber/v2/middleware/filesystem"
|
||||
"github.com/mudler/LocalAI/core/explorer"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/http/routes"
|
||||
)
|
||||
|
||||
@@ -22,6 +23,7 @@ func Explorer(db *explorer.Database) *fiber.App {
|
||||
|
||||
app := fiber.New(fiberCfg)
|
||||
|
||||
app.Use(middleware.StripPathPrefix())
|
||||
routes.RegisterExplorerRoutes(app, db)
|
||||
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/keyauth"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
)
|
||||
|
||||
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
|
||||
@@ -39,7 +40,9 @@ func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.Er
|
||||
if applicationConfig.OpaqueErrors {
|
||||
return ctx.SendStatus(401)
|
||||
}
|
||||
return ctx.Status(401).Render("views/login", nil)
|
||||
return ctx.Status(401).Render("views/login", fiber.Map{
|
||||
"BaseURL": utils.BaseURL(ctx),
|
||||
})
|
||||
}
|
||||
if applicationConfig.OpaqueErrors {
|
||||
return ctx.SendStatus(500)
|
||||
|
||||
36
core/http/middleware/strippathprefix.go
Normal file
36
core/http/middleware/strippathprefix.go
Normal file
@@ -0,0 +1,36 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
// StripPathPrefix returns a middleware that strips a path prefix from the request path.
|
||||
// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header.
|
||||
func StripPathPrefix() fiber.Handler {
|
||||
return func(c *fiber.Ctx) error {
|
||||
for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] {
|
||||
if prefix != "" {
|
||||
path := c.Path()
|
||||
pos := len(prefix)
|
||||
|
||||
if prefix[pos-1] == '/' {
|
||||
pos--
|
||||
} else {
|
||||
prefix += "/"
|
||||
}
|
||||
|
||||
if strings.HasPrefix(path, prefix) {
|
||||
c.Path(path[pos:])
|
||||
break
|
||||
} else if prefix[:pos] == path {
|
||||
c.Redirect(prefix)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return c.Next()
|
||||
}
|
||||
}
|
||||
121
core/http/middleware/strippathprefix_test.go
Normal file
121
core/http/middleware/strippathprefix_test.go
Normal file
@@ -0,0 +1,121 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestStripPathPrefix(t *testing.T) {
|
||||
var actualPath string
|
||||
|
||||
app := fiber.New()
|
||||
|
||||
app.Use(StripPathPrefix())
|
||||
|
||||
app.Get("/hello/world", func(c *fiber.Ctx) error {
|
||||
actualPath = c.Path()
|
||||
return nil
|
||||
})
|
||||
|
||||
app.Get("/", func(c *fiber.Ctx) error {
|
||||
actualPath = c.Path()
|
||||
return nil
|
||||
})
|
||||
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
path string
|
||||
prefixHeader []string
|
||||
expectStatus int
|
||||
expectPath string
|
||||
}{
|
||||
{
|
||||
name: "without prefix and header",
|
||||
path: "/hello/world",
|
||||
expectStatus: 200,
|
||||
expectPath: "/hello/world",
|
||||
},
|
||||
{
|
||||
name: "without prefix and headers on root path",
|
||||
path: "/",
|
||||
expectStatus: 200,
|
||||
expectPath: "/",
|
||||
},
|
||||
{
|
||||
name: "without prefix but header",
|
||||
path: "/hello/world",
|
||||
prefixHeader: []string{"/otherprefix/"},
|
||||
expectStatus: 200,
|
||||
expectPath: "/hello/world",
|
||||
},
|
||||
{
|
||||
name: "with prefix but non-matching header",
|
||||
path: "/prefix/hello/world",
|
||||
prefixHeader: []string{"/otherprefix/"},
|
||||
expectStatus: 404,
|
||||
},
|
||||
{
|
||||
name: "with prefix and matching header",
|
||||
path: "/myprefix/hello/world",
|
||||
prefixHeader: []string{"/myprefix/"},
|
||||
expectStatus: 200,
|
||||
expectPath: "/hello/world",
|
||||
},
|
||||
{
|
||||
name: "with prefix and 1st header matching",
|
||||
path: "/myprefix/hello/world",
|
||||
prefixHeader: []string{"/myprefix/", "/otherprefix/"},
|
||||
expectStatus: 200,
|
||||
expectPath: "/hello/world",
|
||||
},
|
||||
{
|
||||
name: "with prefix and 2nd header matching",
|
||||
path: "/myprefix/hello/world",
|
||||
prefixHeader: []string{"/otherprefix/", "/myprefix/"},
|
||||
expectStatus: 200,
|
||||
expectPath: "/hello/world",
|
||||
},
|
||||
{
|
||||
name: "with prefix and header not ending with slash",
|
||||
path: "/myprefix/hello/world",
|
||||
prefixHeader: []string{"/myprefix"},
|
||||
expectStatus: 200,
|
||||
expectPath: "/hello/world",
|
||||
},
|
||||
{
|
||||
name: "with prefix and non-matching header not ending with slash",
|
||||
path: "/myprefix-suffix/hello/world",
|
||||
prefixHeader: []string{"/myprefix"},
|
||||
expectStatus: 404,
|
||||
},
|
||||
{
|
||||
name: "redirect when prefix does not end with a slash",
|
||||
path: "/myprefix",
|
||||
prefixHeader: []string{"/myprefix"},
|
||||
expectStatus: 302,
|
||||
expectPath: "/myprefix/",
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
actualPath = ""
|
||||
req := httptest.NewRequest("GET", tc.path, nil)
|
||||
if tc.prefixHeader != nil {
|
||||
req.Header["X-Forwarded-Prefix"] = tc.prefixHeader
|
||||
}
|
||||
|
||||
resp, err := app.Test(req, -1)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code")
|
||||
|
||||
if tc.expectStatus == 200 {
|
||||
require.Equal(t, tc.expectPath, actualPath, "rewritten path")
|
||||
} else if tc.expectStatus == 302 {
|
||||
require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
fiberhtml "github.com/gofiber/template/html/v2"
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/russross/blackfriday"
|
||||
)
|
||||
@@ -26,7 +27,9 @@ func notFoundHandler(c *fiber.Ctx) error {
|
||||
})
|
||||
} else {
|
||||
// The client expects an HTML response
|
||||
return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
|
||||
return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,62 +11,62 @@ import (
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterLocalAIRoutes(app *fiber.App,
|
||||
func RegisterLocalAIRoutes(router *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService) {
|
||||
|
||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||
router.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||
|
||||
// LocalAI API endpoints
|
||||
if !appConfig.DisableGalleryEndpoint {
|
||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||
app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
||||
router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
||||
|
||||
app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||
app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||
app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||
router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
}
|
||||
|
||||
app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||
app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
|
||||
router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||
router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||
|
||||
if !appConfig.DisableMetrics {
|
||||
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
}
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
// p2p
|
||||
if p2p.IsP2PEnabled() {
|
||||
app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
|
||||
app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
|
||||
router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
|
||||
router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
|
||||
}
|
||||
|
||||
app.Get("/version", func(c *fiber.Ctx) error {
|
||||
router.Get("/version", func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
}{Version: internal.PrintableVersion()})
|
||||
})
|
||||
|
||||
app.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||
router.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||
|
||||
// misc
|
||||
app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||
router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
|
||||
@@ -2,84 +2,134 @@ package routes
|
||||
|
||||
import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterOpenAIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig) {
|
||||
application *application.Application) {
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
|
||||
app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/chat/completions",
|
||||
openai.ChatEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/chat/completions",
|
||||
openai.ChatEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// edit
|
||||
app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
|
||||
app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/edits",
|
||||
openai.EditEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/edits",
|
||||
openai.EditEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
// files
|
||||
app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
|
||||
// completion
|
||||
app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/v1/engines/:model/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
if appConfig.ImageDir != "" {
|
||||
app.Static("/generated-images", appConfig.ImageDir)
|
||||
if application.ApplicationConfig().ImageDir != "" {
|
||||
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
||||
}
|
||||
|
||||
if appConfig.AudioDir != "" {
|
||||
app.Static("/generated-audio", appConfig.AudioDir)
|
||||
if application.ApplicationConfig().AudioDir != "" {
|
||||
app.Static("/generated-audio", application.ApplicationConfig().AudioDir)
|
||||
}
|
||||
|
||||
// List models
|
||||
app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/models", openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
|
||||
app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
|
||||
}
|
||||
|
||||
@@ -6,20 +6,21 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/http/elements"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/utils"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/xsync"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type modelOpCache struct {
|
||||
@@ -91,6 +92,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
app.Get("/p2p", func(c *fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - P2P dashboard",
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"Version": internal.PrintableVersion(),
|
||||
//"Nodes": p2p.GetAvailableNodes(""),
|
||||
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
|
||||
@@ -149,6 +151,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Models",
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": template.HTML(elements.ListModels(models, processingModels, galleryService)),
|
||||
"Repositories": appConfig.Galleries,
|
||||
@@ -308,6 +311,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
@@ -323,11 +327,12 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
return c.Redirect("/")
|
||||
return c.Redirect(utils.BaseURL(c))
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Talk",
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0],
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
@@ -344,11 +349,12 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
return c.Redirect("/")
|
||||
return c.Redirect(utils.BaseURL(c))
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + backendConfigs[0],
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0],
|
||||
"Version": internal.PrintableVersion(),
|
||||
@@ -364,6 +370,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + c.Params("model"),
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
@@ -380,11 +387,12 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
return c.Redirect("/")
|
||||
return c.Redirect(utils.BaseURL(c))
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + backendConfigs[0].Name,
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
@@ -400,6 +408,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + c.Params("model"),
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
@@ -416,11 +425,12 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
return c.Redirect("/")
|
||||
return c.Redirect(utils.BaseURL(c))
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
|
||||
"BaseURL": utils.BaseURL(c),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
|
||||
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wg
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
font-display: swap;
|
||||
src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
|
||||
src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Inter';
|
||||
font-style: normal;
|
||||
font-weight: 600;
|
||||
font-display: swap;
|
||||
src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
|
||||
src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Inter';
|
||||
font-style: normal;
|
||||
font-weight: 700;
|
||||
font-display: swap;
|
||||
src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
|
||||
src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Roboto';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
font-display: swap;
|
||||
src: url(/static/assets/KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
|
||||
src: url(./KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Roboto';
|
||||
font-style: normal;
|
||||
font-weight: 500;
|
||||
font-display: swap;
|
||||
src: url(/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
|
||||
src: url(./KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
|
||||
}
|
||||
|
||||
@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap
|
||||
font-style: normal;
|
||||
font-weight: 300;
|
||||
font-display: swap;
|
||||
src: url(/static/assets//KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
|
||||
src: url(./KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Roboto';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
font-display: swap;
|
||||
src: url(/static/assets//KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
|
||||
src: url(./KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Roboto';
|
||||
font-style: normal;
|
||||
font-weight: 500;
|
||||
font-display: swap;
|
||||
src: url(/static/assets//KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
|
||||
src: url(./KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Roboto';
|
||||
font-style: normal;
|
||||
font-weight: 700;
|
||||
font-display: swap;
|
||||
src: url(/static/assets//KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
|
||||
src: url(./KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
|
||||
}
|
||||
@font-face {
|
||||
font-family: 'Roboto';
|
||||
font-style: normal;
|
||||
font-weight: 900;
|
||||
font-display: swap;
|
||||
src: url(/static/assets//KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
|
||||
src: url(./KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
|
||||
}
|
||||
|
||||
@@ -143,7 +143,7 @@ function readInputImage() {
|
||||
// }
|
||||
|
||||
// Source: https://stackoverflow.com/a/75751803/11386095
|
||||
const response = await fetch("/v1/chat/completions", {
|
||||
const response = await fetch("v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
|
||||
@@ -48,7 +48,7 @@ async function promptDallE(key, input) {
|
||||
document.getElementById("input").disabled = true;
|
||||
|
||||
const model = document.getElementById("image-model").value;
|
||||
const response = await fetch("/v1/images/generations", {
|
||||
const response = await fetch("v1/images/generations", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
|
||||
@@ -122,7 +122,7 @@ async function sendAudioToWhisper(audioBlob) {
|
||||
formData.append('model', getWhisperModel());
|
||||
API_KEY = localStorage.getItem("key");
|
||||
|
||||
const response = await fetch('/v1/audio/transcriptions', {
|
||||
const response = await fetch('v1/audio/transcriptions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`
|
||||
@@ -139,7 +139,7 @@ async function sendTextToChatGPT(text) {
|
||||
conversationHistory.push({ role: "user", content: text });
|
||||
API_KEY = localStorage.getItem("key");
|
||||
|
||||
const response = await fetch('/v1/chat/completions', {
|
||||
const response = await fetch('v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`,
|
||||
@@ -163,7 +163,7 @@ async function sendTextToChatGPT(text) {
|
||||
async function getTextToSpeechAudio(text) {
|
||||
API_KEY = localStorage.getItem("key");
|
||||
|
||||
const response = await fetch('/v1/audio/speech', {
|
||||
const response = await fetch('v1/audio/speech', {
|
||||
|
||||
method: 'POST',
|
||||
headers: {
|
||||
|
||||
@@ -19,7 +19,7 @@ async function tts(key, input) {
|
||||
document.getElementById("input").disabled = true;
|
||||
|
||||
const model = document.getElementById("tts-model").value;
|
||||
const response = await fetch("/tts", {
|
||||
const response = await fetch("tts", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
|
||||
24
core/http/utils/baseurl.go
Normal file
24
core/http/utils/baseurl.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
// BaseURL returns the base URL for the given HTTP request context.
|
||||
// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path.
|
||||
// The returned URL is guaranteed to end with `/`.
|
||||
// The method should be used in conjunction with the StripPathPrefix middleware.
|
||||
func BaseURL(c *fiber.Ctx) string {
|
||||
path := c.Path()
|
||||
origPath := c.OriginalURL()
|
||||
|
||||
if path != origPath && strings.HasSuffix(origPath, path) {
|
||||
pathPrefix := origPath[:len(origPath)-len(path)+1]
|
||||
|
||||
return c.BaseURL() + pathPrefix
|
||||
}
|
||||
|
||||
return c.BaseURL() + "/"
|
||||
}
|
||||
48
core/http/utils/baseurl_test.go
Normal file
48
core/http/utils/baseurl_test.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBaseURL(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
prefix string
|
||||
expectURL string
|
||||
}{
|
||||
{
|
||||
name: "without prefix",
|
||||
prefix: "/",
|
||||
expectURL: "http://example.com/",
|
||||
},
|
||||
{
|
||||
name: "with prefix",
|
||||
prefix: "/myprefix/",
|
||||
expectURL: "http://example.com/myprefix/",
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
app := fiber.New()
|
||||
actualURL := ""
|
||||
|
||||
app.Get(tc.prefix+"hello/world", func(c *fiber.Ctx) error {
|
||||
if tc.prefix != "/" {
|
||||
c.Path("/hello/world")
|
||||
}
|
||||
actualURL = BaseURL(c)
|
||||
return nil
|
||||
})
|
||||
|
||||
req := httptest.NewRequest("GET", tc.prefix+"hello/world", nil)
|
||||
resp, err := app.Test(req, -1)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 200, resp.StatusCode, "response status code")
|
||||
require.Equal(t, tc.expectURL, actualURL, "base URL")
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -12,7 +12,7 @@
|
||||
<div class="header text-center py-12">
|
||||
<h1 class="text-5xl font-bold">Welcome to your LocalAI instance!</h1>
|
||||
<div class="mt-6">
|
||||
<!-- <a href="/" aria-label="HomePage" alt="HomePage">
|
||||
<!-- <a href="./" aria-label="HomePage" alt="HomePage">
|
||||
<img class="mx-auto w-1/4 h-auto" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo">
|
||||
</a>
|
||||
-->
|
||||
|
||||
@@ -28,7 +28,7 @@ SOFTWARE.
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/chat.js"></script>
|
||||
<script defer src="static/chat.js"></script>
|
||||
<style>
|
||||
body {
|
||||
overflow: hidden;
|
||||
@@ -101,9 +101,9 @@ SOFTWARE.
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq . $model }}
|
||||
<option value="/chat/{{.}}" selected class="bg-gray-700 text-white">{{.}}</option>
|
||||
<option value="chat/{{.}}" selected class="bg-gray-700 text-white">{{.}}</option>
|
||||
{{ else }}
|
||||
<option value="/chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
|
||||
<option value="chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
@@ -142,7 +142,7 @@ SOFTWARE.
|
||||
<div id="loader" class="my-2 loader" style="display: none;"></div>
|
||||
<input id="chat-model" type="hidden" value="{{.Model}}">
|
||||
<input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name">
|
||||
<form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
|
||||
<form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
|
||||
<div class="relative w-full">
|
||||
<textarea
|
||||
id="input"
|
||||
|
||||
@@ -370,7 +370,7 @@
|
||||
}
|
||||
}
|
||||
</script>
|
||||
<script src="/static/p2panimation.js"></script>
|
||||
<script src="static/p2panimation.js"></script>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
{{template "views/partials/inprogress" .}}
|
||||
{{ if eq (len .ModelsConfig) 0 }}
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
|
||||
<p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="/browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
|
||||
<p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
|
||||
|
||||
{{ if ne (len .Models) 0 }}
|
||||
<hr class="my-4">
|
||||
@@ -66,7 +66,7 @@
|
||||
{{ end }}
|
||||
</td>
|
||||
<td class="px-4 py-3 font-bold">
|
||||
<p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="/browse?term={{.Name}}">{{.Name}}</a></p>
|
||||
<p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="browse?term={{.Name}}">{{.Name}}</a></p>
|
||||
</td>
|
||||
<td class="px-4 py-3 font-bold">
|
||||
{{ if .Backend }}
|
||||
@@ -84,7 +84,7 @@
|
||||
<td class="px-4 py-3">
|
||||
<button
|
||||
class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="/browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
|
||||
data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
|
||||
</td>
|
||||
{{ end }}
|
||||
{{ range .Models }}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user