mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
123 Commits
revert-205
...
v2.14.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b58274b8a2 | ||
|
|
a31d00d904 | ||
|
|
2cc1bd85af | ||
|
|
2c5a46bc34 | ||
|
|
f7f8b4804b | ||
|
|
e5bd9a76c7 | ||
|
|
4690b534e0 | ||
|
|
6a7a7996bb | ||
|
|
962ebbaf77 | ||
|
|
f90d56d371 | ||
|
|
445cfd4db3 | ||
|
|
b24d44dc56 | ||
|
|
cd31f8d865 | ||
|
|
970cb3a219 | ||
|
|
f7aabf1b50 | ||
|
|
e38610e521 | ||
|
|
3754f154ee | ||
|
|
29d7812344 | ||
|
|
5fd46175dc | ||
|
|
52a268c38c | ||
|
|
53c3842bc2 | ||
|
|
c4f958e11b | ||
|
|
147440b39b | ||
|
|
baff5ff8c2 | ||
|
|
ea13863221 | ||
|
|
93ca56086e | ||
|
|
11c48a0004 | ||
|
|
b7ea9602f5 | ||
|
|
982dc6a2bd | ||
|
|
74d903acca | ||
|
|
5fef3b0ff1 | ||
|
|
0674893649 | ||
|
|
e8d44447ad | ||
|
|
a24cd4fda0 | ||
|
|
01860674c4 | ||
|
|
987b7ad42d | ||
|
|
21974fe1d3 | ||
|
|
26e1892521 | ||
|
|
a78cd67737 | ||
|
|
5e243ceaeb | ||
|
|
1a0a6f60a7 | ||
|
|
3179c019af | ||
|
|
a8089494fd | ||
|
|
a248ede222 | ||
|
|
0f0ae13ad0 | ||
|
|
773d5d23d5 | ||
|
|
c3982212f9 | ||
|
|
7e6bf6e7a1 | ||
|
|
9fc0135991 | ||
|
|
164be58445 | ||
|
|
1f8461767d | ||
|
|
935f4c23f6 | ||
|
|
4c97406f2b | ||
|
|
fb2a05ff43 | ||
|
|
030d555995 | ||
|
|
56d843c263 | ||
|
|
2dc1fa2474 | ||
|
|
c9451cb604 | ||
|
|
006306b183 | ||
|
|
2cd4936c99 | ||
|
|
44bc540bb5 | ||
|
|
6b411ae212 | ||
|
|
eed285f9de | ||
|
|
c8dd8e5ef4 | ||
|
|
365ef92530 | ||
|
|
5fceb876c4 | ||
|
|
d98063e80e | ||
|
|
45761f8be2 | ||
|
|
4ae4e44506 | ||
|
|
2ada13b1ad | ||
|
|
5d170e9264 | ||
|
|
1b0a64aa46 | ||
|
|
aa8e1c63d5 | ||
|
|
60690c9fc4 | ||
|
|
758b0c9042 | ||
|
|
48d0aa2f6d | ||
|
|
b664edde29 | ||
|
|
e16658b7ec | ||
|
|
d30280ed23 | ||
|
|
9dbd217c59 | ||
|
|
23eac98b3c | ||
|
|
4fffc47e77 | ||
|
|
d65214a234 | ||
|
|
2fb34b00b5 | ||
|
|
f718a391c0 | ||
|
|
ac56ac2b2d | ||
|
|
34c3f563fd | ||
|
|
d2bea6f9e3 | ||
|
|
a09fe1b9ba | ||
|
|
55778b35ff | ||
|
|
8b169f1dac | ||
|
|
d344daf129 | ||
|
|
3411e072ca | ||
|
|
8e36fe9b6f | ||
|
|
0d8bf91699 | ||
|
|
bd507678be | ||
|
|
b6f0e80d54 | ||
|
|
729378ca98 | ||
|
|
220958a87c | ||
|
|
f3f6535aad | ||
|
|
228bc4903f | ||
|
|
38c9abed8b | ||
|
|
66b002458d | ||
|
|
39814cab32 | ||
|
|
180cd4ccda | ||
|
|
284ad026b1 | ||
|
|
afa1bca1e3 | ||
|
|
03adc1f60d | ||
|
|
b319ed58b0 | ||
|
|
8d30b39811 | ||
|
|
1038f7469c | ||
|
|
b9e7708643 | ||
|
|
1e37101930 | ||
|
|
b2772509b4 | ||
|
|
27ec84827c | ||
|
|
852316c5a6 | ||
|
|
e9448005a5 | ||
|
|
bbea62b907 | ||
|
|
13012cfa70 | ||
|
|
8f2681f904 | ||
|
|
f9c75d4878 | ||
|
|
502c1eedaa | ||
|
|
e9f090257c |
@@ -5,4 +5,7 @@ models
|
||||
examples/chatbot-ui/models
|
||||
examples/rwkv/models
|
||||
examples/**/models
|
||||
Dockerfile*
|
||||
Dockerfile*
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
4
.env
4
.env
@@ -10,7 +10,7 @@
|
||||
#
|
||||
## Define galleries.
|
||||
## models will to install will be visible in `/models/available`
|
||||
# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
|
||||
|
||||
## CORS settings
|
||||
# LOCALAI_CORS=true
|
||||
@@ -86,4 +86,4 @@
|
||||
# LOCALAI_WATCHDOG_BUSY=true
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||
|
||||
2
.github/bump_docs.sh
vendored
2
.github/bump_docs.sh
vendored
@@ -2,6 +2,6 @@
|
||||
set -xe
|
||||
REPO=$1
|
||||
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
|
||||
|
||||
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||
|
||||
7
.github/labeler.yml
vendored
7
.github/labeler.yml
vendored
@@ -8,6 +8,11 @@ kind/documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*.md'
|
||||
|
||||
area/ai-model:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'gallery/*'
|
||||
|
||||
examples:
|
||||
- any:
|
||||
- changed-files:
|
||||
@@ -16,4 +21,4 @@ examples:
|
||||
ci:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '.github/*'
|
||||
- any-glob-to-any-file: '.github/*'
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.0.0
|
||||
uses: dependabot/fetch-metadata@v2.1.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
94
.github/workflows/generate_grpc_cache.yaml
vendored
Normal file
94
.github/workflows/generate_grpc_cache.yaml
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_caches:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
if: matrix.runs-on == 'ubuntu-latest'
|
||||
run: |
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
df -h
|
||||
echo
|
||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo apt-get remove -y '^mono-.*' || true
|
||||
sudo apt-get remove -y '^ghc-.*' || true
|
||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
sudo apt-get remove -y 'php.*' || true
|
||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
sudo apt-get remove -y '^google-.*' || true
|
||||
sudo apt-get remove -y azure-cli || true
|
||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
sudo apt-get remove -y '^gfortran-.*' || true
|
||||
sudo apt-get remove -y microsoft-edge-stable || true
|
||||
sudo apt-get remove -y firefox || true
|
||||
sudo apt-get remove -y powershell || true
|
||||
sudo apt-get remove -y r-base-core || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
echo
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
sudo rm -rfv build || true
|
||||
sudo rm -rf /usr/share/dotnet || true
|
||||
sudo rm -rf /opt/ghc || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||
df -h
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache GRPC
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.63.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-to: type=gha,ignore-error=true
|
||||
cache-from: type=gha
|
||||
target: grpc
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: false
|
||||
13
.github/workflows/image-pr.yml
vendored
13
.github/workflows/image-pr.yml
vendored
@@ -22,6 +22,7 @@ jobs:
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
@@ -61,12 +62,14 @@ jobs:
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
@@ -85,6 +88,7 @@ jobs:
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
@@ -102,11 +106,12 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
@@ -122,4 +127,4 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
34
.github/workflows/image.yml
vendored
34
.github/workflows/image.yml
vendored
@@ -26,6 +26,7 @@ jobs:
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
aio: ${{ matrix.aio }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
@@ -129,6 +130,7 @@ jobs:
|
||||
image-type: 'extras'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
runs-on: 'arc-runner-set'
|
||||
@@ -140,12 +142,14 @@ jobs:
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
@@ -157,7 +161,8 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
@@ -170,7 +175,8 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
@@ -179,7 +185,8 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
@@ -188,7 +195,8 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
@@ -197,7 +205,8 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
@@ -210,6 +219,7 @@ jobs:
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
@@ -219,6 +229,7 @@ jobs:
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
|
||||
@@ -236,6 +247,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
aio: ${{ matrix.aio }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
@@ -258,7 +270,7 @@ jobs:
|
||||
aio: "-aio-cpu"
|
||||
latest-image: 'latest-cpu'
|
||||
latest-image-aio: 'latest-aio-cpu'
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -269,7 +281,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
@@ -280,7 +292,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -291,7 +303,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
@@ -302,4 +314,4 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
|
||||
40
.github/workflows/image_build.yml
vendored
40
.github/workflows/image_build.yml
vendored
@@ -6,6 +6,10 @@ on:
|
||||
inputs:
|
||||
base-image:
|
||||
description: 'Base image'
|
||||
required: true
|
||||
type: string
|
||||
grpc-base-image:
|
||||
description: 'GRPC Base image, must be a compatible image with base-image'
|
||||
required: false
|
||||
default: ''
|
||||
type: string
|
||||
@@ -57,7 +61,7 @@ on:
|
||||
makeflags:
|
||||
description: 'Make Flags'
|
||||
required: false
|
||||
default: '--jobs=3 --output-sync=target'
|
||||
default: '--jobs=4 --output-sync=target'
|
||||
type: string
|
||||
aio:
|
||||
description: 'AIO Image Name'
|
||||
@@ -197,29 +201,14 @@ jobs:
|
||||
username: ${{ secrets.quayUsername }}
|
||||
password: ${{ secrets.quayPassword }}
|
||||
|
||||
- name: Cache GRPC
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
GRPC_VERSION=v1.58.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,ignore-error=true
|
||||
target: grpc
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: false
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
@@ -227,6 +216,9 @@ jobs:
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.63.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -236,14 +228,6 @@ jobs:
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Inspect image
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
|
||||
- name: Build and push AIO image
|
||||
if: inputs.aio != ''
|
||||
uses: docker/build-push-action@v5
|
||||
|
||||
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
- pull_request
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.58.0
|
||||
GRPC_VERSION: v1.63.0
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
51
.github/workflows/test-extra.yml
vendored
51
.github/workflows/test-extra.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -74,6 +74,37 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||
|
||||
|
||||
tests-rerankers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test rerankers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
||||
|
||||
tests-diffusers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -94,7 +125,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -124,7 +155,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -154,7 +185,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -186,7 +217,7 @@ jobs:
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -258,7 +289,7 @@ jobs:
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -291,7 +322,7 @@ jobs:
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
@@ -318,7 +349,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
@@ -345,7 +376,7 @@ jobs:
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test coqui
|
||||
|
||||
25
.github/workflows/test.yml
vendored
25
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.58.0
|
||||
GRPC_VERSION: v1.63.0
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -121,8 +121,11 @@ jobs:
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
timeout-minutes: 5
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-aio-container:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -173,8 +176,11 @@ jobs:
|
||||
make run-e2e-aio
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
timeout-minutes: 5
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-apple:
|
||||
runs-on: macOS-14
|
||||
@@ -197,7 +203,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
@@ -207,5 +213,8 @@ jobs:
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
timeout-minutes: 5
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
31
.github/workflows/update_swagger.yaml
vendored
Normal file
31
.github/workflows/update_swagger.yaml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Update swagger
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
swagger:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- run: |
|
||||
go install github.com/swaggo/swag/cmd/swag@latest
|
||||
- name: Bump swagger 🔧
|
||||
run: |
|
||||
make swagger
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: 'feat(swagger): update swagger'
|
||||
title: 'feat(swagger): update swagger'
|
||||
branch: "update/swagger"
|
||||
body: Update swagger
|
||||
signoff: true
|
||||
|
||||
18
.github/workflows/yaml-check.yml
vendored
Normal file
18
.github/workflows/yaml-check.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: 'Yamllint GitHub Actions'
|
||||
on:
|
||||
- pull_request
|
||||
jobs:
|
||||
yamllint:
|
||||
name: 'Yamllint'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: 'Checkout'
|
||||
uses: actions/checkout@master
|
||||
- name: 'Yamllint'
|
||||
uses: karancode/yamllint-github-action@master
|
||||
with:
|
||||
yamllint_file_or_dir: 'gallery'
|
||||
yamllint_strict: false
|
||||
yamllint_comment: true
|
||||
env:
|
||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -44,3 +44,6 @@ prepare
|
||||
*.pb.go
|
||||
*pb2.py
|
||||
*pb2_grpc.py
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
209
Dockerfile
209
Dockerfile
@@ -1,41 +1,43 @@
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# extras or core
|
||||
FROM ${BASE_IMAGE} as requirements-core
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.21.7
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
unzip && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
pip install --upgrade pip
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH $PATH:/usr/local/go/bin
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
ENV PATH $PATH:/root/go/bin
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
|
||||
# Install protobuf (the version in 22.04 is too old)
|
||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
# Install grpcio-tools (the version in 22.04 is too old)
|
||||
RUN pip install --user grpcio-tools
|
||||
|
||||
@@ -46,16 +48,6 @@ RUN update-ca-certificates
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get install -y software-properties-common && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||
; fi
|
||||
|
||||
# Cuda
|
||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
|
||||
@@ -63,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
ENV PATH /opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get install -y \
|
||||
libopenblas-dev \
|
||||
libopencv-dev \
|
||||
&& apt-get clean
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
libopencv-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
@@ -79,57 +73,114 @@ RUN test -n "$TARGETARCH" \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-core as requirements-extras
|
||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||
FROM requirements-core AS requirements-extras
|
||||
|
||||
RUN apt install -y gpg && \
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends gpg && \
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y conda && apt-get clean
|
||||
apt-get install -y --no-install-recommends \
|
||||
conda && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN apt-get install -y python3-pip && apt-get clean
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
espeak-ng \
|
||||
espeak && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN if [ ! -e /usr/bin/python ]; then \
|
||||
ln -s /usr/bin/python3 /usr/bin/python \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM ${BASE_IMAGE} as grpc
|
||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
ARG MAKEFLAGS
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.58.0
|
||||
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
build-essential \
|
||||
cmake \
|
||||
git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
|
||||
|
||||
RUN cd grpc && \
|
||||
mkdir -p cmake/build && \
|
||||
cd cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
||||
make
|
||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||
# and running make install in the target container
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
mkdir -p /build/grpc/cmake/build && \
|
||||
cd /build/grpc/cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||
make && \
|
||||
make install && \
|
||||
rm -rf /build
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE} as builder
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
ARG GRPC_BACKENDS
|
||||
@@ -148,39 +199,36 @@ COPY . .
|
||||
COPY .git .
|
||||
RUN echo "GO_TAGS: $GO_TAGS"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN make prepare
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y libclblast-dev && \
|
||||
apt-get clean \
|
||||
; fi
|
||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||
# here so that we can generate the grpc code for the stablediffusion build
|
||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
|
||||
RUN cd /build/grpc/cmake/build && make install
|
||||
# Install the pre-built GRPC
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE}
|
||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
@@ -201,21 +249,13 @@ ENV PIP_CACHE_PURGE=true
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get install -y ffmpeg && apt-get clean \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# Add OpenCL
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y libclblast1 && \
|
||||
apt-get clean \
|
||||
; fi
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
||||
@@ -225,9 +265,9 @@ WORKDIR /build
|
||||
COPY . .
|
||||
|
||||
COPY --from=builder /build/sources ./sources/
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc
|
||||
RUN make prepare-sources
|
||||
|
||||
# Copy the binary
|
||||
COPY --from=builder /build/local-ai ./
|
||||
@@ -257,6 +297,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/transformers \
|
||||
; fi
|
||||
@@ -287,7 +330,7 @@ RUN mkdir -p /build/models
|
||||
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||
|
||||
VOLUME /build/models
|
||||
EXPOSE 8080
|
||||
|
||||
91
Makefile
91
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60
|
||||
CPPLLAMA_VERSION?=6ecf3189e00a1e8e737a78b6d10e1d7006e050a2
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028
|
||||
WHISPER_CPP_VERSION?=8fac6455ffeb0a0950a84e790ddb74f7290d33c4
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
@@ -25,10 +25,10 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# tinydream version
|
||||
TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
@@ -99,7 +99,7 @@ endif
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export LLAMA_CUBLAS=1
|
||||
export WHISPER_CUBLAS=1
|
||||
export WHISPER_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||
endif
|
||||
|
||||
@@ -179,20 +179,20 @@ endif
|
||||
all: help
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-bert.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-bert/libgobert.a: sources/go-bert
|
||||
$(MAKE) -C sources/go-bert libgobert.a
|
||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||
|
||||
## go-llama-ggml
|
||||
sources/go-llama-ggml:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
@@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
|
||||
## RWKV
|
||||
sources/go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
||||
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-rwkv.cpp:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
## stable diffusion
|
||||
sources/go-stable-diffusion:
|
||||
@@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||
|
||||
## whisper
|
||||
sources/whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && make libwhisper.a
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a
|
||||
|
||||
get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||
@@ -271,12 +272,12 @@ prepare-sources: get-sources replace
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama-ggml clean
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C sources/go-rwkv clean
|
||||
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert clean
|
||||
$(MAKE) -C sources/go-bert.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
@@ -436,10 +437,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -505,6 +506,14 @@ petals-protogen:
|
||||
petals-protogen-clean:
|
||||
$(MAKE) -C backend/python/petals protogen-clean
|
||||
|
||||
.PHONY: rerankers-protogen
|
||||
rerankers-protogen:
|
||||
$(MAKE) -C backend/python/rerankers protogen
|
||||
|
||||
.PHONY: rerankers-protogen-clean
|
||||
rerankers-protogen-clean:
|
||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||
|
||||
.PHONY: sentencetransformers-protogen
|
||||
sentencetransformers-protogen:
|
||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
||||
@@ -563,6 +572,7 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/mamba
|
||||
$(MAKE) -C backend/python/sentencetransformers
|
||||
$(MAKE) -C backend/python/rerankers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/transformers-musicgen
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
@@ -598,8 +608,8 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
|
||||
backend-assets/grpc: protogen-go replace
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
|
||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||
@@ -641,17 +651,16 @@ ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
@@ -698,7 +707,7 @@ docker-aio-all:
|
||||
|
||||
docker-image-intel:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -706,7 +715,7 @@ docker-image-intel:
|
||||
|
||||
docker-image-intel-xpu:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -714,4 +723,4 @@ docker-image-intel-xpu:
|
||||
|
||||
.PHONY: swagger
|
||||
swagger:
|
||||
swag init -g core/http/api.go --output swagger
|
||||
swag init -g core/http/app.go --output swagger
|
||||
|
||||
12
README.md
12
README.md
@@ -44,20 +44,23 @@
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
||||
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
||||
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
||||
- Landing page: https://github.com/mudler/LocalAI/pull/1922
|
||||
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
||||
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
@@ -88,7 +91,8 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🆕 [Reranker API](https://localai.io/features/reranker/)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
|
||||
27
aio/cpu/rerank.yaml
Normal file
27
aio/cpu/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
@@ -1,20 +1,27 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -29,8 +36,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
@@ -129,7 +129,7 @@ detect_gpu
|
||||
detect_gpu_size
|
||||
|
||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
|
||||
check_vars
|
||||
|
||||
|
||||
27
aio/gpu-8g/rerank.yaml
Normal file
27
aio/gpu-8g/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
@@ -1,20 +1,27 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -29,8 +36,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
27
aio/intel/rerank.yaml
Normal file
27
aio/intel/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
@@ -2,20 +2,27 @@ name: gpt-4
|
||||
mmap: false
|
||||
f16: false
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -30,8 +37,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
@@ -23,6 +23,30 @@ service Backend {
|
||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||
|
||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||
}
|
||||
|
||||
message RerankRequest {
|
||||
string query = 1;
|
||||
repeated string documents = 2;
|
||||
int32 top_n = 3;
|
||||
}
|
||||
|
||||
message RerankResult {
|
||||
Usage usage = 1;
|
||||
repeated DocumentResult results = 2;
|
||||
}
|
||||
|
||||
message Usage {
|
||||
int32 total_tokens = 1;
|
||||
int32 prompt_tokens = 2;
|
||||
}
|
||||
|
||||
message DocumentResult {
|
||||
int32 index = 1;
|
||||
string text = 2;
|
||||
float relevance_score = 3;
|
||||
}
|
||||
|
||||
message StoresKey {
|
||||
@@ -177,6 +201,7 @@ message ModelOptions {
|
||||
bool EnforceEager = 52;
|
||||
int32 SwapSpace = 53;
|
||||
int32 MaxModelLen = 54;
|
||||
int32 TensorParallelSize = 55;
|
||||
|
||||
string MMProj = 41;
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func runCommand(command []string) (string, error) {
|
||||
cmd := exec.Command(command[0], command[1:]...)
|
||||
func ffmpegCommand(args []string) (string, error) {
|
||||
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
|
||||
cmd.Env = os.Environ()
|
||||
out, err := cmd.CombinedOutput()
|
||||
return string(out), err
|
||||
@@ -21,16 +21,16 @@ func runCommand(command []string) (string, error) {
|
||||
// AudioToWav converts audio to wav for transcribe.
|
||||
// TODO: use https://github.com/mccoyst/ogg?
|
||||
func audioToWav(src, dst string) error {
|
||||
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
||||
out, err := runCommand(command)
|
||||
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
||||
out, err := ffmpegCommand(commandArgs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error: %w out: %s", err, out)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
|
||||
res := schema.Result{}
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
|
||||
res := schema.TranscriptionResult{}
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
|
||||
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
||||
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ dependencies:
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
|
||||
@@ -26,7 +26,7 @@ if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the intel image
|
||||
# (no conda env)
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
|
||||
pip install torch==2.1.0.post0 torchvision==0.16.0.post0 torchaudio==2.1.0.post0 intel-extension-for-pytorch==2.1.20+xpu oneccl_bind_pt==2.1.200+xpu intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
fi
|
||||
|
||||
# If we didn't skip conda, activate the environment
|
||||
|
||||
@@ -47,7 +47,7 @@ dependencies:
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
@@ -120,4 +120,6 @@ dependencies:
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -48,7 +48,7 @@ dependencies:
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
@@ -108,4 +108,6 @@ dependencies:
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -47,7 +47,7 @@ dependencies:
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- humanfriendly==10.0
|
||||
- idna==3.4
|
||||
@@ -60,9 +60,10 @@ dependencies:
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- onnx==1.15.0
|
||||
- openvino==2024.0.0
|
||||
- openvino-telemetry==2023.2.1
|
||||
- optimum[openvino]==1.17.1
|
||||
- openvino==2024.1.0
|
||||
- openvino-telemetry==2024.1.0
|
||||
- optimum[openvino]==1.19.1
|
||||
- optimum-intel==1.16.1
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
@@ -111,5 +112,7 @@ dependencies:
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -34,7 +34,7 @@ dependencies:
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
@@ -61,4 +61,5 @@ dependencies:
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- torch
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
|
||||
@@ -32,7 +32,7 @@ dependencies:
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
@@ -71,4 +71,5 @@ dependencies:
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
|
||||
@@ -31,8 +31,8 @@ if [ -d "/opt/intel" ]; then
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
pip install google-api-python-client \
|
||||
grpcio \
|
||||
grpcio-tools \
|
||||
grpcio==1.63.0 \
|
||||
grpcio-tools==1.63.0 \
|
||||
diffusers==0.24.0 \
|
||||
transformers>=4.25.1 \
|
||||
accelerate \
|
||||
|
||||
@@ -27,7 +27,7 @@ dependencies:
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
|
||||
@@ -27,7 +27,7 @@ dependencies:
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
|
||||
@@ -26,7 +26,7 @@ dependencies:
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
|
||||
@@ -27,7 +27,7 @@ dependencies:
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- numpy==1.26.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- torch==2.1.0
|
||||
- transformers>=4.34.0
|
||||
- descript-audio-codec
|
||||
|
||||
27
backend/python/rerankers/Makefile
Normal file
27
backend/python/rerankers/Makefile
Normal file
@@ -0,0 +1,27 @@
|
||||
.PHONY: rerankers
|
||||
rerankers: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running rerankers..."
|
||||
bash run.sh
|
||||
@echo "rerankers run."
|
||||
|
||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing rerankers..."
|
||||
bash test.sh
|
||||
@echo "rerankers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
5
backend/python/rerankers/README.md
Normal file
5
backend/python/rerankers/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Creating a separate environment for the reranker project
|
||||
|
||||
```
|
||||
make reranker
|
||||
```
|
||||
123
backend/python/rerankers/reranker.py
Executable file
123
backend/python/rerankers/reranker.py
Executable file
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for Rerankers models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from rerankers import Reranker
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
kwargs = {}
|
||||
if request.Type != "":
|
||||
kwargs['model_type'] = request.Type
|
||||
if request.PipelineType != "": # Reuse the PipelineType field for language
|
||||
kwargs['lang'] = request.PipelineType
|
||||
self.model_name = model_name
|
||||
self.model = Reranker(model_name, **kwargs)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Rerank(self, request, context):
|
||||
documents = []
|
||||
for idx, doc in enumerate(request.documents):
|
||||
documents.append(doc)
|
||||
ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
|
||||
# Prepare results to return
|
||||
results = [
|
||||
backend_pb2.DocumentResult(
|
||||
index=res.doc_id,
|
||||
text=res.text,
|
||||
relevance_score=res.score
|
||||
) for res in ranked_results.results
|
||||
]
|
||||
|
||||
# Calculate the usage and total tokens
|
||||
# TODO: Implement the usage calculation with reranker
|
||||
total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split())
|
||||
prompt_tokens = len(request.query.split())
|
||||
usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens)
|
||||
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
14
backend/python/rerankers/run.sh
Executable file
14
backend/python/rerankers/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the reranker server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/reranker.py $@
|
||||
11
backend/python/rerankers/test.sh
Executable file
11
backend/python/rerankers/test.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the reranker server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_reranker.py
|
||||
90
backend/python/rerankers/test_reranker.py
Executable file
90
backend/python/rerankers/test_reranker.py
Executable file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "reranker.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.kill()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_rerank(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
request = backend_pb2.RerankRequest(
|
||||
query="I love you",
|
||||
documents=["I hate you", "I really like you"],
|
||||
top_n=2
|
||||
)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
|
||||
self.assertTrue(response.success)
|
||||
|
||||
rerank_response = stub.Rerank(request)
|
||||
print(rerank_response.results[0])
|
||||
self.assertIsNotNone(rerank_response.results)
|
||||
self.assertEqual(len(rerank_response.results), 2)
|
||||
self.assertEqual(rerank_response.results[0].text, "I really like you")
|
||||
self.assertEqual(rerank_response.results[1].text, "I hate you")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Reranker service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -89,8 +89,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
quantization = None
|
||||
|
||||
if self.CUDA:
|
||||
if request.Device:
|
||||
device_map=request.Device
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
device_map="cuda:0"
|
||||
if request.Quantization == "bnb_4bit":
|
||||
@@ -143,12 +143,37 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
from optimum.intel.openvino import OVModelForCausalLM
|
||||
from openvino.runtime import Core
|
||||
|
||||
if "GPU" in Core().available_devices:
|
||||
device_map="GPU"
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
device_map="CPU"
|
||||
device_map="AUTO"
|
||||
devices = Core().available_devices
|
||||
if "GPU" in " ".join(devices):
|
||||
device_map="AUTO:GPU"
|
||||
|
||||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||
compile=True,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT","GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"},
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
elif request.Type == "OVModelForFeatureExtraction":
|
||||
from optimum.intel.openvino import OVModelForFeatureExtraction
|
||||
from openvino.runtime import Core
|
||||
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
device_map="AUTO"
|
||||
devices = Core().available_devices
|
||||
if "GPU" in " ".join(devices):
|
||||
device_map="AUTO:GPU"
|
||||
|
||||
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT", "GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"},
|
||||
export=True,
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
@@ -158,6 +183,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
quantization_config=quantization,
|
||||
device_map=device_map,
|
||||
torch_dtype=compute)
|
||||
if request.ContextSize > 0:
|
||||
self.max_tokens = request.ContextSize
|
||||
else:
|
||||
self.max_tokens = self.model.config.max_position_embeddings
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||
self.XPU = False
|
||||
|
||||
@@ -212,12 +242,27 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
set_seed(request.Seed)
|
||||
if request.TopP == 0:
|
||||
request.TopP = 0.9
|
||||
|
||||
if request.TopK == 0:
|
||||
request.TopK = 40
|
||||
|
||||
prompt = request.Prompt
|
||||
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||
|
||||
eos_token_id = self.tokenizer.eos_token_id
|
||||
if request.StopPrompts:
|
||||
eos_token_id = []
|
||||
for word in request.StopPrompts:
|
||||
eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
|
||||
|
||||
inputs = self.tokenizer(prompt, return_tensors="pt")
|
||||
|
||||
max_tokens = 200
|
||||
if request.Tokens > 0:
|
||||
max_tokens = request.Tokens
|
||||
else:
|
||||
max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1]
|
||||
|
||||
inputs = self.tokenizer(request.Prompt, return_tensors="pt")
|
||||
if self.CUDA:
|
||||
inputs = inputs.to("cuda")
|
||||
if XPU and self.OV == False:
|
||||
@@ -235,7 +280,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
attention_mask=inputs["attention_mask"],
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id,
|
||||
streamer=streamer)
|
||||
thread=Thread(target=self.model.generate, kwargs=config)
|
||||
@@ -264,7 +309,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
attention_mask=inputs["attention_mask"],
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id)
|
||||
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
|
||||
|
||||
@@ -334,4 +379,4 @@ if __name__ == "__main__":
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
asyncio.run(serve(args.addr))
|
||||
asyncio.run(serve(args.addr))
|
||||
|
||||
@@ -42,7 +42,7 @@ dependencies:
|
||||
- future==0.18.3
|
||||
- gradio==3.47.1
|
||||
- gradio-client==0.6.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- h11==0.14.0
|
||||
- httpcore==0.18.0
|
||||
- httpx==0.25.0
|
||||
|
||||
@@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
engine_args.trust_remote_code = request.TrustRemoteCode
|
||||
if request.EnforceEager:
|
||||
engine_args.enforce_eager = request.EnforceEager
|
||||
if request.TensorParallelSize:
|
||||
engine_args.tensor_parallel_size = request.TensorParallelSize
|
||||
if request.SwapSpace != 0:
|
||||
engine_args.swap_space = request.SwapSpace
|
||||
if request.MaxModelLen != 0:
|
||||
|
||||
39
core/application.go
Normal file
39
core/application.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
|
||||
// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
|
||||
type Application struct {
|
||||
|
||||
// Application-Level Config
|
||||
ApplicationConfig *config.ApplicationConfig
|
||||
// ApplicationState *ApplicationState
|
||||
|
||||
// Core Low-Level Services
|
||||
BackendConfigLoader *config.BackendConfigLoader
|
||||
ModelLoader *model.ModelLoader
|
||||
|
||||
// Backend Services
|
||||
// EmbeddingsBackendService *backend.EmbeddingsBackendService
|
||||
// ImageGenerationBackendService *backend.ImageGenerationBackendService
|
||||
// LLMBackendService *backend.LLMBackendService
|
||||
// TranscriptionBackendService *backend.TranscriptionBackendService
|
||||
// TextToSpeechBackendService *backend.TextToSpeechBackendService
|
||||
|
||||
// LocalAI System Services
|
||||
BackendMonitorService *services.BackendMonitorService
|
||||
GalleryService *services.GalleryService
|
||||
ListModelsService *services.ListModelsService
|
||||
LocalAIMetricsService *services.LocalAIMetricsService
|
||||
// OpenAIService *services.OpenAIService
|
||||
}
|
||||
|
||||
// TODO [NEXT PR?]: Break up ApplicationConfig.
|
||||
// Migrate over stuff that is not set via config at all - especially runtime stuff
|
||||
type ApplicationState struct {
|
||||
}
|
||||
@@ -7,7 +7,8 @@ import (
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
||||
@@ -74,6 +75,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
EnforceEager: c.EnforceEager,
|
||||
SwapSpace: int32(c.SwapSpace),
|
||||
MaxModelLen: int32(c.MaxModelLen),
|
||||
TensorParallelSize: int32(c.TensorParallelSize),
|
||||
MMProj: c.MMProj,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
@@ -108,8 +110,12 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
|
||||
promptCachePath := ""
|
||||
if c.PromptCachePath != "" {
|
||||
p := filepath.Join(modelPath, c.PromptCachePath)
|
||||
os.MkdirAll(filepath.Dir(p), 0755)
|
||||
promptCachePath = p
|
||||
err := os.MkdirAll(filepath.Dir(p), 0750)
|
||||
if err == nil {
|
||||
promptCachePath = p
|
||||
} else {
|
||||
log.Error().Err(err).Str("promptCachePath", promptCachePath).Msg("error creating prompt cache folder")
|
||||
}
|
||||
}
|
||||
|
||||
return &pb.PredictOptions{
|
||||
|
||||
39
core/backend/rerank.go
Normal file
39
core/backend/rerank.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
bb := backend
|
||||
if bb == "" {
|
||||
return nil, fmt.Errorf("backend is required")
|
||||
}
|
||||
|
||||
grpcOpts := gRPCModelOpts(backendConfig)
|
||||
|
||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
||||
model.WithBackendString(bb),
|
||||
model.WithModel(modelFile),
|
||||
model.WithContext(appConfig.Context),
|
||||
model.WithAssetDir(appConfig.AssetsDestination),
|
||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||
})
|
||||
rerankModel, err := loader.BackendLoader(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if rerankModel == nil {
|
||||
return nil, fmt.Errorf("could not load rerank model")
|
||||
}
|
||||
|
||||
res, err := rerankModel.Rerank(context.Background(), request)
|
||||
|
||||
return res, err
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) {
|
||||
func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
|
||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
||||
model.WithBackendString(model.WhisperBackend),
|
||||
|
||||
@@ -53,7 +53,7 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
|
||||
return "", nil, fmt.Errorf("could not load piper model")
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0755); err != nil {
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import "embed"
|
||||
|
||||
type Context struct {
|
||||
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||
|
||||
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||
BackendAssets embed.FS `kong:"-"`
|
||||
|
||||
@@ -25,7 +25,7 @@ type ModelsInstall struct {
|
||||
}
|
||||
|
||||
type ModelsCMD struct {
|
||||
List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"`
|
||||
List ModelsList `cmd:"" help:"List the models available in your galleries" default:"withargs"`
|
||||
Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
|
||||
}
|
||||
|
||||
@@ -64,7 +64,11 @@ func (mi *ModelsInstall) Run(ctx *Context) error {
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
progressBar.Set(int(percentage * 10))
|
||||
v := int(percentage * 10)
|
||||
err := progressBar.Set(v)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
|
||||
}
|
||||
}
|
||||
err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,30 +2,31 @@ package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http"
|
||||
"github.com/go-skynet/LocalAI/core/startup"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type RunCMD struct {
|
||||
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
|
||||
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
|
||||
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
||||
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
|
||||
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
|
||||
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
||||
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||
LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
|
||||
// The alias on this option is there to preserve functionality with the old `--config-file` parameter
|
||||
ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
|
||||
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
@@ -41,7 +42,7 @@ type RunCMD struct {
|
||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||
@@ -60,15 +61,16 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
config.WithYAMLConfigPreload(r.PreloadModelsConfig),
|
||||
config.WithModelPath(r.ModelsPath),
|
||||
config.WithContextSize(r.ContextSize),
|
||||
config.WithDebug(*ctx.LogLevel == "debug"),
|
||||
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
|
||||
config.WithImageDir(r.ImagePath),
|
||||
config.WithAudioDir(r.AudioPath),
|
||||
config.WithUploadDir(r.UploadPath),
|
||||
config.WithConfigsDir(r.ConfigPath),
|
||||
config.WithDynamicConfigDir(r.LocalaiConfigDir),
|
||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||
config.WithF16(r.F16),
|
||||
config.WithStringGalleries(r.Galleries),
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithDisableMessage(false),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithThreads(r.Threads),
|
||||
@@ -82,8 +84,8 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
idleWatchDog := r.EnableWatchdogIdle
|
||||
busyWatchDog := r.EnableWatchdogBusy
|
||||
|
||||
if r.DisableWelcome {
|
||||
opts = append(opts, config.DisableWelcomePage)
|
||||
if r.DisableWebUI {
|
||||
opts = append(opts, config.DisableWebUI)
|
||||
}
|
||||
|
||||
if idleWatchDog || busyWatchDog {
|
||||
@@ -129,22 +131,10 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
}
|
||||
|
||||
cl, ml, options, err := startup.Startup(opts...)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
||||
}
|
||||
|
||||
// Watch the configuration directory
|
||||
// If the directory does not exist, we don't watch it
|
||||
if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
|
||||
closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
|
||||
defer closeConfigWatcherFn()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir)
|
||||
}
|
||||
}
|
||||
|
||||
appHTTP, err := http.App(cl, ml, options)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error during HTTP App construction")
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type TranscriptCMD struct {
|
||||
@@ -41,7 +42,12 @@ func (t *TranscriptCMD) Run(ctx *Context) error {
|
||||
|
||||
c.Threads = &t.Threads
|
||||
|
||||
defer ml.StopAllGRPC()
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("unable to stop all grpc processes")
|
||||
}
|
||||
}()
|
||||
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
|
||||
if err != nil {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type TTSCMD struct {
|
||||
@@ -40,7 +41,12 @@ func (t *TTSCMD) Run(ctx *Context) error {
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
|
||||
defer ml.StopAllGRPC()
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("unable to stop all grpc processes")
|
||||
}
|
||||
}()
|
||||
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
|
||||
@@ -15,13 +15,15 @@ type ApplicationConfig struct {
|
||||
ConfigFile string
|
||||
ModelPath string
|
||||
UploadLimitMB, Threads, ContextSize int
|
||||
DisableWelcomePage bool
|
||||
DisableWebUI bool
|
||||
F16 bool
|
||||
Debug, DisableMessage bool
|
||||
Debug bool
|
||||
ImageDir string
|
||||
AudioDir string
|
||||
UploadDir string
|
||||
ConfigsDir string
|
||||
DynamicConfigsDir string
|
||||
DynamicConfigsDirPollInterval time.Duration
|
||||
CORS bool
|
||||
PreloadJSONModels string
|
||||
PreloadModelsFromPath string
|
||||
@@ -55,12 +57,11 @@ type AppOption func(*ApplicationConfig)
|
||||
|
||||
func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
|
||||
opt := &ApplicationConfig{
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Threads: 1,
|
||||
ContextSize: 512,
|
||||
Debug: true,
|
||||
DisableMessage: true,
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Threads: 1,
|
||||
ContextSize: 512,
|
||||
Debug: true,
|
||||
}
|
||||
for _, oo := range o {
|
||||
oo(opt)
|
||||
@@ -106,8 +107,8 @@ var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
|
||||
o.WatchDogBusy = true
|
||||
}
|
||||
|
||||
var DisableWelcomePage = func(o *ApplicationConfig) {
|
||||
o.DisableWelcomePage = true
|
||||
var DisableWebUI = func(o *ApplicationConfig) {
|
||||
o.DisableWebUI = true
|
||||
}
|
||||
|
||||
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
|
||||
@@ -234,12 +235,6 @@ func WithDebug(debug bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithDisableMessage(disableMessage bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DisableMessage = disableMessage
|
||||
}
|
||||
}
|
||||
|
||||
func WithAudioDir(audioDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.AudioDir = audioDir
|
||||
@@ -264,6 +259,18 @@ func WithConfigsDir(configsDir string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDir = dynamicConfigsDir
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDirPollInterval = interval
|
||||
}
|
||||
}
|
||||
|
||||
func WithApiKeys(apiKeys []string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ApiKeys = apiKeys
|
||||
|
||||
@@ -1,22 +1,12 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/charmbracelet/glamour"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -39,7 +29,7 @@ type BackendConfig struct {
|
||||
InputToken [][]int `yaml:"-"`
|
||||
functionCallString, functionCallNameString string `yaml:"-"`
|
||||
|
||||
FunctionsConfig Functions `yaml:"function"`
|
||||
FunctionsConfig functions.FunctionsConfig `yaml:"function"`
|
||||
|
||||
FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
@@ -139,6 +129,7 @@ type LLMConfig struct {
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
@@ -157,13 +148,6 @@ type AutoGPTQ struct {
|
||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
||||
type Functions struct {
|
||||
DisableNoAction bool `yaml:"disable_no_action"`
|
||||
NoActionFunctionName string `yaml:"no_action_function_name"`
|
||||
NoActionDescriptionName string `yaml:"no_action_description_name"`
|
||||
ParallelCalls bool `yaml:"parallel_calls"`
|
||||
}
|
||||
|
||||
type TemplateConfig struct {
|
||||
Chat string `yaml:"chat"`
|
||||
ChatMessage string `yaml:"chat_message"`
|
||||
@@ -189,6 +173,36 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
|
||||
return len(c.functionCallNameString) > 0
|
||||
}
|
||||
|
||||
// MMProjFileName returns the filename of the MMProj file
|
||||
// If the MMProj is a URL, it will return the MD5 of the URL which is the filename
|
||||
func (c *BackendConfig) MMProjFileName() string {
|
||||
modelURL := downloader.ConvertURL(c.MMProj)
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
return utils.MD5(modelURL)
|
||||
}
|
||||
|
||||
return c.MMProj
|
||||
}
|
||||
|
||||
func (c *BackendConfig) IsMMProjURL() bool {
|
||||
return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj))
|
||||
}
|
||||
|
||||
func (c *BackendConfig) IsModelURL() bool {
|
||||
return downloader.LooksLikeURL(downloader.ConvertURL(c.Model))
|
||||
}
|
||||
|
||||
// ModelFileName returns the filename of the model
|
||||
// If the model is a URL, it will return the MD5 of the URL which is the filename
|
||||
func (c *BackendConfig) ModelFileName() string {
|
||||
modelURL := downloader.ConvertURL(c.Model)
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
return utils.MD5(modelURL)
|
||||
}
|
||||
|
||||
return c.Model
|
||||
}
|
||||
|
||||
func (c *BackendConfig) FunctionToCall() string {
|
||||
if c.functionCallNameString != "" &&
|
||||
c.functionCallNameString != "none" && c.functionCallNameString != "auto" {
|
||||
@@ -210,15 +224,15 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
defaultTopP := 0.95
|
||||
defaultTopK := 40
|
||||
defaultTemp := 0.9
|
||||
defaultMaxTokens := 2048
|
||||
defaultMirostat := 2
|
||||
defaultMirostatTAU := 5.0
|
||||
defaultMirostatETA := 0.1
|
||||
defaultTypicalP := 1.0
|
||||
defaultTFZ := 1.0
|
||||
defaultZero := 0
|
||||
|
||||
// Try to offload all GPU layers (if GPU is found)
|
||||
defaultNGPULayers := 99999999
|
||||
defaultHigh := 99999999
|
||||
|
||||
trueV := true
|
||||
falseV := false
|
||||
@@ -243,7 +257,13 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
|
||||
if cfg.MMap == nil {
|
||||
// MMap is enabled by default
|
||||
cfg.MMap = &trueV
|
||||
|
||||
// Only exception is for Intel GPUs
|
||||
if os.Getenv("XPU") != "" {
|
||||
cfg.MMap = &falseV
|
||||
} else {
|
||||
cfg.MMap = &trueV
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.MMlock == nil {
|
||||
@@ -259,7 +279,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
}
|
||||
|
||||
if cfg.Maxtokens == nil {
|
||||
cfg.Maxtokens = &defaultMaxTokens
|
||||
cfg.Maxtokens = &defaultZero
|
||||
}
|
||||
|
||||
if cfg.Mirostat == nil {
|
||||
@@ -274,7 +294,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.MirostatTAU = &defaultMirostatTAU
|
||||
}
|
||||
if cfg.NGPULayers == nil {
|
||||
cfg.NGPULayers = &defaultNGPULayers
|
||||
cfg.NGPULayers = &defaultHigh
|
||||
}
|
||||
|
||||
if cfg.LowVRAM == nil {
|
||||
@@ -312,287 +332,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Debug = &trueV
|
||||
}
|
||||
}
|
||||
|
||||
////// Config Loader ////////
|
||||
|
||||
type BackendConfigLoader struct {
|
||||
configs map[string]BackendConfig
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
type LoadOptions struct {
|
||||
debug bool
|
||||
threads, ctxSize int
|
||||
f16 bool
|
||||
}
|
||||
|
||||
func LoadOptionDebug(debug bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.debug = debug
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionThreads(threads int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.threads = threads
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.ctxSize = ctxSize
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionF16(f16 bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.f16 = f16
|
||||
}
|
||||
}
|
||||
|
||||
type ConfigLoaderOption func(*LoadOptions)
|
||||
|
||||
func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
|
||||
for _, l := range options {
|
||||
l(lo)
|
||||
}
|
||||
}
|
||||
|
||||
// Load a config file for a model
|
||||
func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
cfgExisting, exists := cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
} else {
|
||||
// Try loading a model config file
|
||||
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cl.LoadBackendConfig(
|
||||
modelConfig, opts...,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
cfgExisting, exists = cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.SetDefaults(opts...)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func NewBackendConfigLoader() *BackendConfigLoader {
|
||||
return &BackendConfigLoader{
|
||||
configs: make(map[string]BackendConfig),
|
||||
}
|
||||
}
|
||||
func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
cc.SetDefaults(opts...)
|
||||
}
|
||||
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
lo := &LoadOptions{}
|
||||
lo.Apply(opts...)
|
||||
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
c, err := ReadBackendConfigFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range c {
|
||||
cm.configs[cc.Name] = *cc
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
c, err := ReadBackendConfig(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
cl.configs[c.Name] = *c
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
v, exists := cl.configs[m]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []BackendConfig
|
||||
for _, v := range cl.configs {
|
||||
res = append(res, v)
|
||||
}
|
||||
|
||||
sort.SliceStable(res, func(i, j int) bool {
|
||||
return res[i].Name < res[j].Name
|
||||
})
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) ListBackendConfigs() []string {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []string
|
||||
for k := range cl.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Preload prepare models if they are not local but url or huggingface repositories
|
||||
func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
|
||||
status := func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
}
|
||||
|
||||
log.Info().Msgf("Preloading models from %s", modelPath)
|
||||
|
||||
renderMode := "dark"
|
||||
if os.Getenv("COLOR") != "" {
|
||||
renderMode = os.Getenv("COLOR")
|
||||
}
|
||||
|
||||
glamText := func(t string) {
|
||||
out, err := glamour.Render(t, renderMode)
|
||||
if err == nil && os.Getenv("NO_COLOR") == "" {
|
||||
fmt.Println(out)
|
||||
} else {
|
||||
fmt.Println(t)
|
||||
}
|
||||
}
|
||||
|
||||
for i, config := range cl.configs {
|
||||
|
||||
// Download files and verify their SHA
|
||||
for _, file := range config.DownloadFiles {
|
||||
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||
|
||||
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
|
||||
return err
|
||||
}
|
||||
// Create file path
|
||||
filePath := filepath.Join(modelPath, file.Filename)
|
||||
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
modelURL := config.PredictionOptions.Model
|
||||
modelURL = downloader.ConvertURL(modelURL)
|
||||
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
// md5 of model name
|
||||
md5Name := utils.MD5(modelURL)
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
c := &cc
|
||||
c.PredictionOptions.Model = md5Name
|
||||
cl.configs[i] = *c
|
||||
}
|
||||
if cl.configs[i].Name != "" {
|
||||
glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
|
||||
}
|
||||
if cl.configs[i].Description != "" {
|
||||
//glamText("**Description**")
|
||||
glamText(cl.configs[i].Description)
|
||||
}
|
||||
if cl.configs[i].Usage != "" {
|
||||
//glamText("**Usage**")
|
||||
glamText(cl.configs[i].Usage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadBackendConfigsFromPath reads all the configurations of the models from a path
|
||||
// (non-recursive)
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files = append(files, info)
|
||||
}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
|
||||
if err == nil {
|
||||
cm.configs[c.Name] = *c
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
317
core/config/backend_config_loader.go
Normal file
317
core/config/backend_config_loader.go
Normal file
@@ -0,0 +1,317 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/charmbracelet/glamour"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type BackendConfigLoader struct {
|
||||
configs map[string]BackendConfig
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
type LoadOptions struct {
|
||||
debug bool
|
||||
threads, ctxSize int
|
||||
f16 bool
|
||||
}
|
||||
|
||||
func LoadOptionDebug(debug bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.debug = debug
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionThreads(threads int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.threads = threads
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.ctxSize = ctxSize
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionF16(f16 bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.f16 = f16
|
||||
}
|
||||
}
|
||||
|
||||
type ConfigLoaderOption func(*LoadOptions)
|
||||
|
||||
func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
|
||||
for _, l := range options {
|
||||
l(lo)
|
||||
}
|
||||
}
|
||||
|
||||
// Load a config file for a model
|
||||
func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
cfgExisting, exists := cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
} else {
|
||||
// Try loading a model config file
|
||||
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cl.LoadBackendConfig(
|
||||
modelConfig, opts...,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
cfgExisting, exists = cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.SetDefaults(opts...)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func NewBackendConfigLoader() *BackendConfigLoader {
|
||||
return &BackendConfigLoader{
|
||||
configs: make(map[string]BackendConfig),
|
||||
}
|
||||
}
|
||||
func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
cc.SetDefaults(opts...)
|
||||
}
|
||||
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
lo := &LoadOptions{}
|
||||
lo.Apply(opts...)
|
||||
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
c, err := ReadBackendConfigFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range c {
|
||||
cm.configs[cc.Name] = *cc
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
c, err := ReadBackendConfig(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
cl.configs[c.Name] = *c
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
v, exists := cl.configs[m]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []BackendConfig
|
||||
for _, v := range cl.configs {
|
||||
res = append(res, v)
|
||||
}
|
||||
|
||||
sort.SliceStable(res, func(i, j int) bool {
|
||||
return res[i].Name < res[j].Name
|
||||
})
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) ListBackendConfigs() []string {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []string
|
||||
for k := range cl.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Preload prepare models if they are not local but url or huggingface repositories
|
||||
func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
|
||||
status := func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
}
|
||||
|
||||
log.Info().Msgf("Preloading models from %s", modelPath)
|
||||
|
||||
renderMode := "dark"
|
||||
if os.Getenv("COLOR") != "" {
|
||||
renderMode = os.Getenv("COLOR")
|
||||
}
|
||||
|
||||
glamText := func(t string) {
|
||||
out, err := glamour.Render(t, renderMode)
|
||||
if err == nil && os.Getenv("NO_COLOR") == "" {
|
||||
fmt.Println(out)
|
||||
} else {
|
||||
fmt.Println(t)
|
||||
}
|
||||
}
|
||||
|
||||
for i, config := range cl.configs {
|
||||
|
||||
// Download files and verify their SHA
|
||||
for i, file := range config.DownloadFiles {
|
||||
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||
|
||||
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
|
||||
return err
|
||||
}
|
||||
// Create file path
|
||||
filePath := filepath.Join(modelPath, file.Filename)
|
||||
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// If the model is an URL, expand it, and download the file
|
||||
if config.IsModelURL() {
|
||||
modelFileName := config.ModelFileName()
|
||||
modelURL := downloader.ConvertURL(config.Model)
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
c := &cc
|
||||
c.PredictionOptions.Model = modelFileName
|
||||
cl.configs[i] = *c
|
||||
}
|
||||
|
||||
if config.IsMMProjURL() {
|
||||
modelFileName := config.MMProjFileName()
|
||||
modelURL := downloader.ConvertURL(config.MMProj)
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
c := &cc
|
||||
c.MMProj = modelFileName
|
||||
cl.configs[i] = *c
|
||||
}
|
||||
|
||||
if cl.configs[i].Name != "" {
|
||||
glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
|
||||
}
|
||||
if cl.configs[i].Description != "" {
|
||||
//glamText("**Description**")
|
||||
glamText(cl.configs[i].Description)
|
||||
}
|
||||
if cl.configs[i].Usage != "" {
|
||||
//glamText("**Usage**")
|
||||
glamText(cl.configs[i].Usage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadBackendConfigsFromPath reads all the configurations of the models from a path
|
||||
// (non-recursive)
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files = append(files, info)
|
||||
}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") ||
|
||||
strings.HasPrefix(file.Name(), ".") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
|
||||
if err == nil {
|
||||
cm.configs[c.Name] = *c
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
309
core/http/api.go
309
core/http/api.go
@@ -1,309 +0,0 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/swagger" // swagger handler
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/logger"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
)
|
||||
|
||||
func readAuthHeader(c *fiber.Ctx) string {
|
||||
authHeader := c.Get("Authorization")
|
||||
|
||||
// elevenlabs
|
||||
xApiKey := c.Get("xi-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
// anthropic
|
||||
xApiKey = c.Get("x-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
return authHeader
|
||||
}
|
||||
|
||||
// @title LocalAI API
|
||||
// @version 2.0.0
|
||||
// @description The LocalAI Rest API.
|
||||
// @termsOfService
|
||||
// @contact.name LocalAI
|
||||
// @contact.url https://localai.io
|
||||
// @license.name MIT
|
||||
// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
|
||||
// @BasePath /
|
||||
// @securityDefinitions.apikey BearerAuth
|
||||
// @in header
|
||||
// @name Authorization
|
||||
|
||||
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
Views: renderEngine(),
|
||||
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
DisableStartupMessage: appConfig.DisableMessage,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
code := fiber.StatusInternalServerError
|
||||
|
||||
// Retrieve the custom status code if it's a *fiber.Error
|
||||
var e *fiber.Error
|
||||
if errors.As(err, &e) {
|
||||
code = e.Code
|
||||
}
|
||||
|
||||
// Send custom error page
|
||||
return ctx.Status(code).JSON(
|
||||
schema.ErrorResponse{
|
||||
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||
},
|
||||
)
|
||||
},
|
||||
})
|
||||
|
||||
if appConfig.Debug {
|
||||
app.Use(logger.New(logger.Config{
|
||||
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
|
||||
}))
|
||||
}
|
||||
|
||||
// Default middleware config
|
||||
|
||||
if !appConfig.Debug {
|
||||
app.Use(recover.New())
|
||||
}
|
||||
|
||||
metricsService, err := services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if metricsService != nil {
|
||||
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
|
||||
app.Hooks().OnShutdown(func() error {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||
auth := func(c *fiber.Ctx) error {
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
// Check for api_keys.json file
|
||||
fileContent, err := os.ReadFile("api_keys.json")
|
||||
if err == nil {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err != nil {
|
||||
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
|
||||
}
|
||||
|
||||
// Add file keys to options.ApiKeys
|
||||
appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
|
||||
}
|
||||
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
authHeader := readAuthHeader(c)
|
||||
if authHeader == "" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||
}
|
||||
|
||||
// If it's a bearer token
|
||||
authHeaderParts := strings.Split(authHeader, " ")
|
||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||
}
|
||||
|
||||
apiKey := authHeaderParts[1]
|
||||
for _, key := range appConfig.ApiKeys {
|
||||
if apiKey == key {
|
||||
return c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||
}
|
||||
|
||||
if appConfig.CORS {
|
||||
var c func(ctx *fiber.Ctx) error
|
||||
if appConfig.CORSAllowOrigins == "" {
|
||||
c = cors.New()
|
||||
} else {
|
||||
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
|
||||
}
|
||||
|
||||
app.Use(c)
|
||||
}
|
||||
|
||||
// LocalAI API endpoints
|
||||
galleryService := services.NewGalleryService(appConfig.ModelPath)
|
||||
galleryService.Start(appConfig.Context, cl)
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
}{Version: internal.PrintableVersion()})
|
||||
})
|
||||
|
||||
// Make sure directories exists
|
||||
os.MkdirAll(appConfig.ImageDir, 0755)
|
||||
os.MkdirAll(appConfig.AudioDir, 0755)
|
||||
os.MkdirAll(appConfig.UploadDir, 0755)
|
||||
os.MkdirAll(appConfig.ConfigsDir, 0755)
|
||||
os.MkdirAll(appConfig.ModelPath, 0755)
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||
|
||||
welcomeRoute(
|
||||
app,
|
||||
cl,
|
||||
ml,
|
||||
appConfig,
|
||||
auth,
|
||||
)
|
||||
|
||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
|
||||
app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Elevenlabs
|
||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
|
||||
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
|
||||
// edit
|
||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
|
||||
// files
|
||||
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
|
||||
// completion
|
||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
|
||||
|
||||
if appConfig.ImageDir != "" {
|
||||
app.Static("/generated-images", appConfig.ImageDir)
|
||||
}
|
||||
|
||||
if appConfig.AudioDir != "" {
|
||||
app.Static("/generated-audio", appConfig.AudioDir)
|
||||
}
|
||||
|
||||
ok := func(c *fiber.Ctx) error {
|
||||
return c.SendStatus(200)
|
||||
}
|
||||
|
||||
// Kubernetes health checks
|
||||
app.Get("/healthz", ok)
|
||||
app.Get("/readyz", ok)
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
|
||||
|
||||
// models
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
|
||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
app.Use(notFoundHandler)
|
||||
|
||||
return app, nil
|
||||
}
|
||||
196
core/http/app.go
Normal file
196
core/http/app.go
Normal file
@@ -0,0 +1,196 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/go-skynet/LocalAI/core/http/routes"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/contrib/fiberzerolog"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/filesystem"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
|
||||
// swagger handler
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func readAuthHeader(c *fiber.Ctx) string {
|
||||
authHeader := c.Get("Authorization")
|
||||
|
||||
// elevenlabs
|
||||
xApiKey := c.Get("xi-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
// anthropic
|
||||
xApiKey = c.Get("x-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
return authHeader
|
||||
}
|
||||
|
||||
// Embed a directory
|
||||
//
|
||||
//go:embed static/*
|
||||
var embedDirStatic embed.FS
|
||||
|
||||
// @title LocalAI API
|
||||
// @version 2.0.0
|
||||
// @description The LocalAI Rest API.
|
||||
// @termsOfService
|
||||
// @contact.name LocalAI
|
||||
// @contact.url https://localai.io
|
||||
// @license.name MIT
|
||||
// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
|
||||
// @BasePath /
|
||||
// @securityDefinitions.apikey BearerAuth
|
||||
// @in header
|
||||
// @name Authorization
|
||||
|
||||
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
Views: renderEngine(),
|
||||
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
// We disable the Fiber startup message as it does not conform to structured logging.
|
||||
// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
|
||||
DisableStartupMessage: true,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
code := fiber.StatusInternalServerError
|
||||
|
||||
// Retrieve the custom status code if it's a *fiber.Error
|
||||
var e *fiber.Error
|
||||
if errors.As(err, &e) {
|
||||
code = e.Code
|
||||
}
|
||||
|
||||
// Send custom error page
|
||||
return ctx.Status(code).JSON(
|
||||
schema.ErrorResponse{
|
||||
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||
},
|
||||
)
|
||||
},
|
||||
})
|
||||
|
||||
app.Hooks().OnListen(func(listenData fiber.ListenData) error {
|
||||
scheme := "http"
|
||||
if listenData.TLS {
|
||||
scheme = "https"
|
||||
}
|
||||
log.Info().Str("endpoint", scheme+"://"+listenData.Host+":"+listenData.Port).Msg("LocalAI API is listening! Please connect to the endpoint for API documentation.")
|
||||
return nil
|
||||
})
|
||||
|
||||
// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
|
||||
logger := log.Logger
|
||||
app.Use(fiberzerolog.New(fiberzerolog.Config{
|
||||
Logger: &logger,
|
||||
}))
|
||||
|
||||
// Default middleware config
|
||||
|
||||
if !appConfig.Debug {
|
||||
app.Use(recover.New())
|
||||
}
|
||||
|
||||
metricsService, err := services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if metricsService != nil {
|
||||
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
|
||||
app.Hooks().OnShutdown(func() error {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||
auth := func(c *fiber.Ctx) error {
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
authHeader := readAuthHeader(c)
|
||||
if authHeader == "" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||
}
|
||||
|
||||
// If it's a bearer token
|
||||
authHeaderParts := strings.Split(authHeader, " ")
|
||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||
}
|
||||
|
||||
apiKey := authHeaderParts[1]
|
||||
for _, key := range appConfig.ApiKeys {
|
||||
if apiKey == key {
|
||||
return c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||
}
|
||||
|
||||
if appConfig.CORS {
|
||||
var c func(ctx *fiber.Ctx) error
|
||||
if appConfig.CORSAllowOrigins == "" {
|
||||
c = cors.New()
|
||||
} else {
|
||||
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
|
||||
}
|
||||
|
||||
app.Use(c)
|
||||
}
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
galleryService := services.NewGalleryService(appConfig.ModelPath)
|
||||
galleryService.Start(appConfig.Context, cl)
|
||||
|
||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
|
||||
if !appConfig.DisableWebUI {
|
||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
}
|
||||
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
|
||||
|
||||
app.Use("/static", filesystem.New(filesystem.Config{
|
||||
Root: http.FS(embedDirStatic),
|
||||
PathPrefix: "static",
|
||||
Browse: true,
|
||||
}))
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
app.Use(notFoundHandler)
|
||||
|
||||
return app, nil
|
||||
}
|
||||
@@ -211,7 +211,6 @@ var _ = Describe("API test", func() {
|
||||
|
||||
commonOpts := []config.AppOption{
|
||||
config.WithDebug(true),
|
||||
config.WithDisableMessage(true),
|
||||
}
|
||||
|
||||
Context("API with ephemeral models", func() {
|
||||
@@ -223,7 +222,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
modelDir = filepath.Join(tmpdir, "models")
|
||||
backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
|
||||
err = os.Mkdir(backendAssetsDir, 0755)
|
||||
err = os.Mkdir(backendAssetsDir, 0750)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
@@ -242,7 +241,7 @@ var _ = Describe("API test", func() {
|
||||
}
|
||||
out, err := yaml.Marshal(g)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0644)
|
||||
err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0600)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
galleries := []gallery.Gallery{
|
||||
@@ -490,11 +489,10 @@ var _ = Describe("API test", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
modelName := "codellama"
|
||||
|
||||
modelName := "hermes-2-pro-mistral"
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
|
||||
Name: modelName,
|
||||
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -557,7 +555,7 @@ var _ = Describe("API test", func() {
|
||||
var res map[string]string
|
||||
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
})
|
||||
@@ -597,7 +595,7 @@ var _ = Describe("API test", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
modelDir = filepath.Join(tmpdir, "models")
|
||||
backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
|
||||
err = os.Mkdir(backendAssetsDir, 0755)
|
||||
err = os.Mkdir(backendAssetsDir, 0750)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
285
core/http/elements/gallery.go
Normal file
285
core/http/elements/gallery.go
Normal file
@@ -0,0 +1,285 @@
|
||||
package elements
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/chasefleming/elem-go"
|
||||
"github.com/chasefleming/elem-go/attrs"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/xsync"
|
||||
)
|
||||
|
||||
const (
|
||||
NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
|
||||
)
|
||||
|
||||
func DoneProgress(uid, text string) string {
|
||||
return elem.Div(
|
||||
attrs.Props{},
|
||||
elem.H3(
|
||||
attrs.Props{
|
||||
"role": "status",
|
||||
"id": "pblabel",
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text(text),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func ErrorProgress(err string) string {
|
||||
return elem.Div(
|
||||
attrs.Props{},
|
||||
elem.H3(
|
||||
attrs.Props{
|
||||
"role": "status",
|
||||
"id": "pblabel",
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text("Error"+err),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func ProgressBar(progress string) string {
|
||||
return elem.Div(attrs.Props{
|
||||
"class": "progress",
|
||||
"role": "progressbar",
|
||||
"aria-valuemin": "0",
|
||||
"aria-valuemax": "100",
|
||||
"aria-valuenow": "0",
|
||||
"aria-labelledby": "pblabel",
|
||||
},
|
||||
elem.Div(attrs.Props{
|
||||
"id": "pb",
|
||||
"class": "progress-bar",
|
||||
"style": "width:" + progress + "%",
|
||||
}),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func StartProgressBar(uid, progress, text string) string {
|
||||
if progress == "" {
|
||||
progress = "0"
|
||||
}
|
||||
return elem.Div(attrs.Props{
|
||||
"hx-trigger": "done",
|
||||
"hx-get": "/browse/job/" + uid,
|
||||
"hx-swap": "outerHTML",
|
||||
"hx-target": "this",
|
||||
},
|
||||
elem.H3(
|
||||
attrs.Props{
|
||||
"role": "status",
|
||||
"id": "pblabel",
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text(text),
|
||||
// This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms.
|
||||
elem.Div(attrs.Props{
|
||||
"hx-get": "/browse/job/progress/" + uid,
|
||||
"hx-trigger": "every 600ms",
|
||||
"hx-target": "this",
|
||||
"hx-swap": "innerHTML",
|
||||
},
|
||||
elem.Raw(ProgressBar(progress)),
|
||||
),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func cardSpan(text, icon string) elem.Node {
|
||||
return elem.Span(
|
||||
attrs.Props{
|
||||
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
|
||||
},
|
||||
elem.I(attrs.Props{
|
||||
"class": icon + " pr-2",
|
||||
}),
|
||||
elem.Text(text),
|
||||
)
|
||||
}
|
||||
|
||||
func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[string, string]) string {
|
||||
//StartProgressBar(uid, "0")
|
||||
modelsElements := []elem.Node{}
|
||||
// span := func(s string) elem.Node {
|
||||
// return elem.Span(
|
||||
// attrs.Props{
|
||||
// "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs",
|
||||
// },
|
||||
// elem.Text(s),
|
||||
// )
|
||||
// }
|
||||
deleteButton := func(m *gallery.GalleryModel) elem.Node {
|
||||
return elem.Button(
|
||||
attrs.Props{
|
||||
"data-twe-ripple-init": "",
|
||||
"data-twe-ripple-color": "light",
|
||||
"class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/delete/model/" + m.Name,
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fa-solid fa-cancel pr-2",
|
||||
},
|
||||
),
|
||||
elem.Text("Delete"),
|
||||
)
|
||||
}
|
||||
|
||||
installButton := func(m *gallery.GalleryModel) elem.Node {
|
||||
return elem.Button(
|
||||
attrs.Props{
|
||||
"data-twe-ripple-init": "",
|
||||
"data-twe-ripple-color": "light",
|
||||
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name),
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fa-solid fa-download pr-2",
|
||||
},
|
||||
),
|
||||
elem.Text("Install"),
|
||||
)
|
||||
}
|
||||
|
||||
descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
|
||||
|
||||
return elem.Div(
|
||||
attrs.Props{
|
||||
"class": "p-6 text-surface dark:text-white",
|
||||
},
|
||||
elem.H5(
|
||||
attrs.Props{
|
||||
"class": "mb-2 text-xl font-medium leading-tight",
|
||||
},
|
||||
elem.Text(m.Name),
|
||||
),
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "mb-4 text-base",
|
||||
},
|
||||
elem.Text(m.Description),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
actionDiv := func(m *gallery.GalleryModel) elem.Node {
|
||||
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
|
||||
currentlyInstalling := installing.Exists(galleryID)
|
||||
|
||||
nodes := []elem.Node{
|
||||
cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
|
||||
}
|
||||
|
||||
if m.License != "" {
|
||||
nodes = append(nodes,
|
||||
cardSpan("License: "+m.License, "fas fa-book"),
|
||||
)
|
||||
}
|
||||
|
||||
for _, tag := range m.Tags {
|
||||
nodes = append(nodes,
|
||||
cardSpan(tag, "fas fa-tag"),
|
||||
)
|
||||
}
|
||||
|
||||
for i, url := range m.URLs {
|
||||
nodes = append(nodes,
|
||||
elem.A(
|
||||
attrs.Props{
|
||||
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
|
||||
"href": url,
|
||||
"target": "_blank",
|
||||
},
|
||||
elem.I(attrs.Props{
|
||||
"class": "fas fa-link pr-2",
|
||||
}),
|
||||
elem.Text("Link #"+fmt.Sprintf("%d", i+1)),
|
||||
))
|
||||
}
|
||||
|
||||
return elem.Div(
|
||||
attrs.Props{
|
||||
"class": "px-6 pt-4 pb-2",
|
||||
},
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "mb-4 text-base",
|
||||
},
|
||||
nodes...,
|
||||
),
|
||||
elem.If(
|
||||
currentlyInstalling,
|
||||
elem.Node( // If currently installing, show progress bar
|
||||
elem.Raw(StartProgressBar(installing.Get(galleryID), "0", "Installing")),
|
||||
), // Otherwise, show install button (if not installed) or display "Installed"
|
||||
elem.If(m.Installed,
|
||||
//elem.Node(elem.Div(
|
||||
// attrs.Props{},
|
||||
// span("Installed"), deleteButton(m),
|
||||
// )),
|
||||
deleteButton(m),
|
||||
installButton(m),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
for _, m := range models {
|
||||
|
||||
elems := []elem.Node{}
|
||||
|
||||
if m.Icon == "" {
|
||||
m.Icon = NoImage
|
||||
}
|
||||
|
||||
elems = append(elems,
|
||||
|
||||
elem.Div(attrs.Props{
|
||||
"class": "flex justify-center items-center",
|
||||
},
|
||||
elem.A(attrs.Props{
|
||||
"href": "#!",
|
||||
// "class": "justify-center items-center",
|
||||
},
|
||||
elem.Img(attrs.Props{
|
||||
// "class": "rounded-t-lg object-fit object-center h-96",
|
||||
"class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
|
||||
"src": m.Icon,
|
||||
}),
|
||||
),
|
||||
))
|
||||
|
||||
elems = append(elems, descriptionDiv(m), actionDiv(m))
|
||||
modelsElements = append(modelsElements,
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
// "class": "p-6",
|
||||
},
|
||||
elems...,
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
wrapper := elem.Div(attrs.Props{
|
||||
"class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark",
|
||||
//"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark",
|
||||
}, modelsElements...)
|
||||
|
||||
return wrapper.Render()
|
||||
}
|
||||
84
core/http/endpoints/jina/rerank.go
Normal file
84
core/http/endpoints/jina/rerank.go
Normal file
@@ -0,0 +1,84 @@
|
||||
package jina
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
|
||||
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
req := new(schema.JINARerankRequest)
|
||||
if err := c.BodyParser(req); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
|
||||
"error": "Cannot parse JSON",
|
||||
})
|
||||
}
|
||||
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
}
|
||||
|
||||
request := &proto.RerankRequest{
|
||||
Query: req.Query,
|
||||
TopN: int32(req.TopN),
|
||||
Documents: req.Documents,
|
||||
}
|
||||
|
||||
results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
response := &schema.JINARerankResponse{
|
||||
Model: req.Model,
|
||||
}
|
||||
|
||||
for _, r := range results.Results {
|
||||
response.Results = append(response.Results, schema.JINADocumentResult{
|
||||
Index: int(r.Index),
|
||||
Document: schema.JINAText{Text: r.Text},
|
||||
RelevanceScore: float64(r.RelevanceScore),
|
||||
})
|
||||
}
|
||||
|
||||
response.Usage.TotalTokens = int(results.Usage.TotalTokens)
|
||||
response.Usage.PromptTokens = int(results.Usage.PromptTokens)
|
||||
|
||||
return c.Status(fiber.StatusOK).JSON(response)
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
|
||||
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.BackendMonitorRequest)
|
||||
@@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error
|
||||
}
|
||||
}
|
||||
|
||||
func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
|
||||
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.BackendMonitorRequest)
|
||||
// Get input data from the request body
|
||||
|
||||
@@ -74,6 +74,27 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
|
||||
}
|
||||
}
|
||||
|
||||
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelName := c.Params("name")
|
||||
|
||||
mgs.galleryApplier.C <- gallery.GalleryOp{
|
||||
Delete: true,
|
||||
GalleryName: modelName,
|
||||
}
|
||||
|
||||
uuid, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.JSON(struct {
|
||||
ID string `json:"uuid"`
|
||||
StatusURL string `json:"status"`
|
||||
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
}
|
||||
}
|
||||
|
||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||
|
||||
32
core/http/endpoints/localai/welcome.go
Normal file
32
core/http/endpoints/localai/welcome.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||
cl *config.BackendConfigLoader, ml *model.ModelLoader) func(*fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, _ := ml.ListModels()
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": models,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
|
||||
// The client expects a JSON response
|
||||
return c.Status(fiber.StatusOK).JSON(summary)
|
||||
} else {
|
||||
// Render index
|
||||
return c.Render("views/index", summary)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -455,21 +455,19 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
||||
for i, assistant := range Assistants {
|
||||
if assistant.ID == assistantID {
|
||||
for j, fileId := range assistant.FileIDs {
|
||||
if fileId == fileId {
|
||||
Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
|
||||
Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
|
||||
|
||||
// Check if the file exists in the assistantFiles slice
|
||||
for i, assistantFile := range AssistantFiles {
|
||||
if assistantFile.ID == fileId {
|
||||
// Remove the file from the assistantFiles slice
|
||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: true,
|
||||
})
|
||||
}
|
||||
// Check if the file exists in the assistantFiles slice
|
||||
for i, assistantFile := range AssistantFiles {
|
||||
if assistantFile.ID == fileId {
|
||||
// Remove the file from the assistantFiles slice
|
||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,10 +3,6 @@ package openai
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
@@ -16,6 +12,11 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var configsDir string = "/tmp/localai/configs"
|
||||
@@ -49,8 +50,8 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
}
|
||||
|
||||
_ = os.RemoveAll(appConfig.ConfigsDir)
|
||||
_ = os.MkdirAll(appConfig.ConfigsDir, 0755)
|
||||
_ = os.MkdirAll(modelPath, 0755)
|
||||
_ = os.MkdirAll(appConfig.ConfigsDir, 0750)
|
||||
_ = os.MkdirAll(modelPath, 0750)
|
||||
os.Create(filepath.Join(modelPath, "ggml-gpt4all-j"))
|
||||
|
||||
app := fiber.New(fiber.Config{
|
||||
|
||||
@@ -11,9 +11,8 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -68,8 +67,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
return true
|
||||
})
|
||||
|
||||
results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
|
||||
noActionToRun := len(results) > 0 && results[0].name == noAction
|
||||
results := functions.ParseFunctionCall(result, config.FunctionsConfig)
|
||||
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
|
||||
|
||||
switch {
|
||||
case noActionToRun:
|
||||
@@ -82,7 +81,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
responses <- initialMessage
|
||||
|
||||
result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
|
||||
result, err := handleQuestion(config, req, ml, startupOptions, results, prompt)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
@@ -105,7 +104,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
|
||||
default:
|
||||
for i, ss := range results {
|
||||
name, args := ss.name, ss.arguments
|
||||
name, args := ss.Name, ss.Arguments
|
||||
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
@@ -156,8 +155,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
processFunctions := false
|
||||
funcs := grammar.Functions{}
|
||||
modelFile, input, err := readRequest(c, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
@@ -169,6 +166,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
log.Debug().Msgf("Configuration read: %+v", config)
|
||||
|
||||
funcs := input.Functions
|
||||
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
||||
|
||||
// Allow the user to set custom actions via config file
|
||||
// to be "embedded" in each model
|
||||
noActionName := "answer"
|
||||
@@ -182,18 +182,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
input.Grammar = functions.JSONBNF
|
||||
}
|
||||
|
||||
config.Grammar = input.Grammar
|
||||
|
||||
// process functions if we have any defined or if we have a function call string
|
||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||
if shouldUseFn {
|
||||
log.Debug().Msgf("Response needs to process functions")
|
||||
}
|
||||
|
||||
processFunctions = true
|
||||
|
||||
noActionGrammar := grammar.Function{
|
||||
switch {
|
||||
case !config.FunctionsConfig.NoGrammar && shouldUseFn:
|
||||
noActionGrammar := functions.Function{
|
||||
Name: noActionName,
|
||||
Description: noActionDescription,
|
||||
Parameters: map[string]interface{}{
|
||||
@@ -206,7 +206,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
// Append the no action function
|
||||
funcs = append(funcs, input.Functions...)
|
||||
if !config.FunctionsConfig.DisableNoAction {
|
||||
funcs = append(funcs, noActionGrammar)
|
||||
}
|
||||
@@ -219,10 +218,17 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// Update input grammar
|
||||
jsStruct := funcs.ToJSONStructure()
|
||||
config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
} else if input.JSONFunctionGrammarObject != nil {
|
||||
case input.JSONFunctionGrammarObject != nil:
|
||||
config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
default:
|
||||
// Force picking one of the functions by the request
|
||||
if config.FunctionToCall() != "" {
|
||||
funcs = funcs.Select(config.FunctionToCall())
|
||||
}
|
||||
}
|
||||
|
||||
// process functions if we have any defined or if we have a function call string
|
||||
|
||||
// functions are not supported in stream mode (yet?)
|
||||
toStream := input.Stream
|
||||
|
||||
@@ -232,8 +238,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
|
||||
// If we are using the tokenizer template, we don't need to process the messages
|
||||
// unless we are processing functions
|
||||
if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
|
||||
|
||||
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
|
||||
suppressConfigSystemPrompt := false
|
||||
mess := []string{}
|
||||
for messageIndex, i := range input.Messages {
|
||||
@@ -346,11 +351,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
templateFile = config.Model
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Chat != "" && !processFunctions {
|
||||
if config.TemplateConfig.Chat != "" && !shouldUseFn {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Functions != "" && processFunctions {
|
||||
if config.TemplateConfig.Functions != "" && shouldUseFn {
|
||||
templateFile = config.TemplateConfig.Functions
|
||||
}
|
||||
|
||||
@@ -370,7 +375,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
||||
if processFunctions {
|
||||
if shouldUseFn && config.Grammar != "" {
|
||||
log.Debug().Msgf("Grammar: %+v", config.Grammar)
|
||||
}
|
||||
}
|
||||
@@ -388,7 +393,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
|
||||
if !processFunctions {
|
||||
if !shouldUseFn {
|
||||
go process(predInput, input, config, ml, responses)
|
||||
} else {
|
||||
go processTools(noActionName, predInput, input, config, ml, responses)
|
||||
@@ -446,18 +451,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// no streaming mode
|
||||
default:
|
||||
result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
|
||||
if !processFunctions {
|
||||
if !shouldUseFn {
|
||||
// no function is called, just reply and use stop as finish reason
|
||||
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
return
|
||||
}
|
||||
|
||||
results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
|
||||
noActionsToRun := len(results) > 0 && results[0].name == noActionName
|
||||
results := functions.ParseFunctionCall(s, config.FunctionsConfig)
|
||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||
|
||||
switch {
|
||||
case noActionsToRun:
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results, predInput)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
@@ -476,7 +481,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
for _, ss := range results {
|
||||
name, args := ss.name, ss.arguments
|
||||
name, args := ss.Name, ss.Arguments
|
||||
if len(input.Tools) > 0 {
|
||||
// If we are using tools, we condense the function calls into
|
||||
// a single response choice with all the tools
|
||||
@@ -534,16 +539,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
|
||||
func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, prompt string) (string, error) {
|
||||
log.Debug().Msgf("nothing to do, computing a reply")
|
||||
|
||||
arg := ""
|
||||
if len(funcResults) > 0 {
|
||||
arg = funcResults[0].Arguments
|
||||
}
|
||||
// If there is a message that the LLM already sends as part of the JSON reply, use it
|
||||
arguments := map[string]interface{}{}
|
||||
json.Unmarshal([]byte(args), &arguments)
|
||||
if err := json.Unmarshal([]byte(arg), &arguments); err != nil {
|
||||
log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object")
|
||||
}
|
||||
m, exists := arguments["message"]
|
||||
if exists {
|
||||
switch message := m.(type) {
|
||||
@@ -580,63 +589,3 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
||||
}
|
||||
return backend.Finetune(*config, prompt, prediction.Response), nil
|
||||
}
|
||||
|
||||
type funcCallResults struct {
|
||||
name string
|
||||
arguments string
|
||||
}
|
||||
|
||||
func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
|
||||
results := []funcCallResults{}
|
||||
|
||||
// TODO: use generics to avoid this code duplication
|
||||
if multipleResults {
|
||||
ss := []map[string]interface{}{}
|
||||
s := utils.EscapeNewLines(llmresult)
|
||||
json.Unmarshal([]byte(s), &ss)
|
||||
log.Debug().Msgf("Function return: %s %+v", s, ss)
|
||||
|
||||
for _, s := range ss {
|
||||
func_name, ok := s["function"]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
args, ok := s["arguments"]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
d, _ := json.Marshal(args)
|
||||
funcName, ok := func_name.(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
results = append(results, funcCallResults{name: funcName, arguments: string(d)})
|
||||
}
|
||||
} else {
|
||||
// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
|
||||
ss := map[string]interface{}{}
|
||||
// This prevent newlines to break JSON parsing for clients
|
||||
s := utils.EscapeNewLines(llmresult)
|
||||
json.Unmarshal([]byte(s), &ss)
|
||||
log.Debug().Msgf("Function return: %s %+v", s, ss)
|
||||
|
||||
// The grammar defines the function name as "function", while OpenAI returns "name"
|
||||
func_name, ok := ss["function"]
|
||||
if !ok {
|
||||
return results
|
||||
}
|
||||
// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
|
||||
args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
|
||||
if !ok {
|
||||
return results
|
||||
}
|
||||
d, _ := json.Marshal(args)
|
||||
funcName, ok := func_name.(string)
|
||||
if !ok {
|
||||
return results
|
||||
}
|
||||
results = append(results, funcCallResults{name: funcName, arguments: string(d)})
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@@ -70,7 +70,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
input.Grammar = functions.JSONBNF
|
||||
}
|
||||
|
||||
config.Grammar = input.Grammar
|
||||
|
||||
@@ -251,7 +251,7 @@ func newMultipartFile(filePath, tag, purpose string) (*strings.Reader, *multipar
|
||||
|
||||
// Helper to create test files
|
||||
func createTestFile(t *testing.T, name string, sizeMB int, option *config.ApplicationConfig) *os.File {
|
||||
err := os.MkdirAll(option.UploadDir, 0755)
|
||||
err := os.MkdirAll(option.UploadDir, 0750)
|
||||
if err != nil {
|
||||
|
||||
t.Fatalf("Error MKDIR: %v", err)
|
||||
|
||||
@@ -1,63 +1,23 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, err := ml.ListModels()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var mm map[string]interface{} = map[string]interface{}{}
|
||||
|
||||
dataModels := []schema.OpenAIModel{}
|
||||
|
||||
var filterFn func(name string) bool
|
||||
// If blank, no filter is applied.
|
||||
filter := c.Query("filter")
|
||||
|
||||
// If filter is not specified, do not filter the list by model name
|
||||
if filter == "" {
|
||||
filterFn = func(_ string) bool { return true }
|
||||
} else {
|
||||
// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
|
||||
rxp, err := regexp.Compile(filter)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filterFn = func(name string) bool {
|
||||
return rxp.MatchString(name)
|
||||
}
|
||||
}
|
||||
|
||||
// By default, exclude any loose files that are already referenced by a configuration file.
|
||||
excludeConfigured := c.QueryBool("excludeConfigured", true)
|
||||
|
||||
// Start with the known configurations
|
||||
for _, c := range cl.GetAllBackendConfigs() {
|
||||
if excludeConfigured {
|
||||
mm[c.Model] = nil
|
||||
}
|
||||
|
||||
if filterFn(c.Name) {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
|
||||
}
|
||||
dataModels, err := lms.ListModels(filter, excludeConfigured)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Then iterate through the loose files:
|
||||
for _, m := range models {
|
||||
// And only adds them if they shouldn't be skipped.
|
||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||
}
|
||||
}
|
||||
|
||||
return c.JSON(struct {
|
||||
Object string `json:"object"`
|
||||
Data []schema.OpenAIModel `json:"data"`
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -145,7 +145,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice grammar.Tool
|
||||
var toolChoice functions.Tool
|
||||
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
|
||||
@@ -7,12 +7,10 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/Masterminds/sprig/v3"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
fiberhtml "github.com/gofiber/template/html/v2"
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"github.com/russross/blackfriday"
|
||||
)
|
||||
|
||||
@@ -33,40 +31,6 @@ func notFoundHandler(c *fiber.Ctx) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func welcomeRoute(
|
||||
app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error,
|
||||
) {
|
||||
if appConfig.DisableWelcomePage {
|
||||
return
|
||||
}
|
||||
|
||||
models, _ := ml.ListModels()
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
app.Get("/", auth, func(c *fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": models,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
|
||||
// The client expects a JSON response
|
||||
return c.Status(fiber.StatusOK).JSON(summary)
|
||||
} else {
|
||||
// Render index
|
||||
return c.Render("views/index", summary)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func renderEngine() *fiberhtml.Engine {
|
||||
engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html")
|
||||
engine.AddFuncMap(sprig.FuncMap())
|
||||
@@ -76,5 +40,5 @@ func renderEngine() *fiberhtml.Engine {
|
||||
|
||||
func markDowner(args ...interface{}) template.HTML {
|
||||
s := blackfriday.MarkdownCommon([]byte(fmt.Sprintf("%s", args...)))
|
||||
return template.HTML(s)
|
||||
return template.HTML(bluemonday.UGCPolicy().Sanitize(string(s)))
|
||||
}
|
||||
|
||||
19
core/http/routes/elevenlabs.go
Normal file
19
core/http/routes/elevenlabs.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
// Elevenlabs
|
||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
19
core/http/routes/jina.go
Normal file
19
core/http/routes/jina.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/jina"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterJINARoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
// POST endpoint to mimic the reranking
|
||||
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||
}
|
||||
65
core/http/routes/localai.go
Normal file
65
core/http/routes/localai.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/swagger"
|
||||
)
|
||||
|
||||
func RegisterLocalAIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||
|
||||
// LocalAI API endpoints
|
||||
|
||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
||||
|
||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
|
||||
app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
|
||||
|
||||
// Kubernetes health checks
|
||||
ok := func(c *fiber.Ctx) error {
|
||||
return c.SendStatus(200)
|
||||
}
|
||||
|
||||
app.Get("/healthz", ok)
|
||||
app.Get("/readyz", ok)
|
||||
|
||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
}{Version: internal.PrintableVersion()})
|
||||
})
|
||||
|
||||
}
|
||||
88
core/http/routes/openai.go
Normal file
88
core/http/routes/openai.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterOpenAIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
|
||||
// edit
|
||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
|
||||
// files
|
||||
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
|
||||
// completion
|
||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
|
||||
|
||||
if appConfig.ImageDir != "" {
|
||||
app.Static("/generated-images", appConfig.ImageDir)
|
||||
}
|
||||
|
||||
if appConfig.AudioDir != "" {
|
||||
app.Static("/generated-audio", appConfig.AudioDir)
|
||||
}
|
||||
|
||||
// models
|
||||
tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS))
|
||||
}
|
||||
273
core/http/routes/ui.go
Normal file
273
core/http/routes/ui.go
Normal file
@@ -0,0 +1,273 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/elements"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/xsync"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
func RegisterUIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml))
|
||||
|
||||
// keeps the state of models that are being installed from the UI
|
||||
var installingModels = xsync.NewSyncedMap[string, string]()
|
||||
|
||||
// Show the Models page (all models)
|
||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Models",
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": template.HTML(elements.ListModels(models, installingModels)),
|
||||
"Repositories": appConfig.Galleries,
|
||||
// "ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/models", summary)
|
||||
})
|
||||
|
||||
// Show the models, filtered from the user input
|
||||
// https://htmx.org/examples/active-search/
|
||||
app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
|
||||
form := struct {
|
||||
Search string `form:"search"`
|
||||
}{}
|
||||
if err := c.BodyParser(&form); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).SendString(err.Error())
|
||||
}
|
||||
|
||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||
|
||||
filteredModels := []*gallery.GalleryModel{}
|
||||
for _, m := range models {
|
||||
if strings.Contains(m.Name, form.Search) ||
|
||||
strings.Contains(m.Description, form.Search) ||
|
||||
strings.Contains(m.Gallery.Name, form.Search) ||
|
||||
strings.Contains(strings.Join(m.Tags, ","), form.Search) {
|
||||
filteredModels = append(filteredModels, m)
|
||||
}
|
||||
}
|
||||
|
||||
return c.SendString(elements.ListModels(filteredModels, installingModels))
|
||||
})
|
||||
|
||||
/*
|
||||
|
||||
Install routes
|
||||
|
||||
*/
|
||||
|
||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
|
||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||
|
||||
id, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uid := id.String()
|
||||
|
||||
installingModels.Set(galleryID, uid)
|
||||
|
||||
op := gallery.GalleryOp{
|
||||
Id: uid,
|
||||
GalleryName: galleryID,
|
||||
Galleries: appConfig.Galleries,
|
||||
}
|
||||
go func() {
|
||||
galleryService.C <- op
|
||||
}()
|
||||
|
||||
return c.SendString(elements.StartProgressBar(uid, "0", "Installation"))
|
||||
})
|
||||
|
||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
|
||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||
|
||||
id, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uid := id.String()
|
||||
|
||||
installingModels.Set(galleryID, uid)
|
||||
|
||||
op := gallery.GalleryOp{
|
||||
Id: uid,
|
||||
Delete: true,
|
||||
GalleryName: galleryID,
|
||||
}
|
||||
go func() {
|
||||
galleryService.C <- op
|
||||
}()
|
||||
|
||||
return c.SendString(elements.StartProgressBar(uid, "0", "Deletion"))
|
||||
})
|
||||
|
||||
// Display the job current progress status
|
||||
// If the job is done, we trigger the /browse/job/:uid route
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
|
||||
jobUID := c.Params("uid")
|
||||
|
||||
status := galleryService.GetStatus(jobUID)
|
||||
if status == nil {
|
||||
//fmt.Errorf("could not find any status for ID")
|
||||
return c.SendString(elements.ProgressBar("0"))
|
||||
}
|
||||
|
||||
if status.Progress == 100 {
|
||||
c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
|
||||
return c.SendString(elements.ProgressBar("100"))
|
||||
}
|
||||
if status.Error != nil {
|
||||
return c.SendString(elements.ErrorProgress(status.Error.Error()))
|
||||
}
|
||||
|
||||
return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
|
||||
})
|
||||
|
||||
// this route is hit when the job is done, and we display the
|
||||
// final state (for now just displays "Installation completed")
|
||||
app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
status := galleryService.GetStatus(c.Params("uid"))
|
||||
|
||||
for _, k := range installingModels.Keys() {
|
||||
if installingModels.Get(k) == c.Params("uid") {
|
||||
installingModels.Delete(k)
|
||||
}
|
||||
}
|
||||
|
||||
displayText := "Installation completed"
|
||||
if status.Deletion {
|
||||
displayText = "Deletion completed"
|
||||
}
|
||||
|
||||
return c.SendString(elements.DoneProgress(c.Params("uid"), displayText))
|
||||
})
|
||||
|
||||
// Show the Chat page
|
||||
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/chat", summary)
|
||||
})
|
||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
return c.SendString("No models available")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/chat", summary)
|
||||
})
|
||||
|
||||
app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/text2image", summary)
|
||||
})
|
||||
|
||||
app.Get("/text2image/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
return c.SendString("No models available")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/text2image", summary)
|
||||
})
|
||||
|
||||
app.Get("/tts/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/tts", summary)
|
||||
})
|
||||
|
||||
app.Get("/tts/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
return c.SendString("No models available")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/tts", summary)
|
||||
})
|
||||
}
|
||||
137
core/http/static/chat.js
Normal file
137
core/http/static/chat.js
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
|
||||
https://github.com/david-haerer/chatapi
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 David Härer
|
||||
Copyright (c) 2024 Ettore Di Giacinto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
*/
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
function submitPrompt(event) {
|
||||
event.preventDefault();
|
||||
|
||||
const input = document.getElementById("input").value;
|
||||
Alpine.store("chat").add("user", input);
|
||||
document.getElementById("input").value = "";
|
||||
const key = localStorage.getItem("key");
|
||||
|
||||
promptGPT(key, input);
|
||||
}
|
||||
|
||||
|
||||
async function promptGPT(key, input) {
|
||||
const model = document.getElementById("chat-model").value;
|
||||
// Set class "loader" to the element with "loader" id
|
||||
//document.getElementById("loader").classList.add("loader");
|
||||
// Make the "loader" visible
|
||||
document.getElementById("loader").style.display = "block";
|
||||
document.getElementById("input").disabled = true;
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
|
||||
// Source: https://stackoverflow.com/a/75751803/11386095
|
||||
const response = await fetch("/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
messages: Alpine.store("chat").messages(),
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = response.body
|
||||
?.pipeThrough(new TextDecoderStream())
|
||||
.getReader();
|
||||
|
||||
if (!reader) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: Failed to decode API response</span>`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
let dataDone = false;
|
||||
const arr = value.split("\n");
|
||||
arr.forEach((data) => {
|
||||
if (data.length === 0) return;
|
||||
if (data.startsWith(":")) return;
|
||||
if (data === "data: [DONE]") {
|
||||
dataDone = true;
|
||||
return;
|
||||
}
|
||||
const token = JSON.parse(data.substring(6)).choices[0].delta.content;
|
||||
if (!token) {
|
||||
return;
|
||||
}
|
||||
hljs.highlightAll();
|
||||
Alpine.store("chat").add("assistant", token);
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
});
|
||||
hljs.highlightAll();
|
||||
if (dataDone) break;
|
||||
}
|
||||
// Remove class "loader" from the element with "loader" id
|
||||
//document.getElementById("loader").classList.remove("loader");
|
||||
document.getElementById("loader").style.display = "none";
|
||||
// enable input
|
||||
document.getElementById("input").disabled = false;
|
||||
// scroll to the bottom of the chat
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
// set focus to the input
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
document.getElementById("prompt").addEventListener("submit", submitPrompt);
|
||||
document.getElementById("input").focus();
|
||||
|
||||
const storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
}
|
||||
|
||||
marked.setOptions({
|
||||
highlight: function (code) {
|
||||
return hljs.highlightAuto(code).value;
|
||||
},
|
||||
});
|
||||
93
core/http/static/general.css
Normal file
93
core/http/static/general.css
Normal file
@@ -0,0 +1,93 @@
|
||||
body {
|
||||
font-family: 'Inter', sans-serif;
|
||||
}
|
||||
.chat-container { height: 90vh; display: flex; flex-direction: column; }
|
||||
.chat-messages { overflow-y: auto; flex-grow: 1; }
|
||||
.htmx-indicator{
|
||||
opacity:0;
|
||||
transition: opacity 10ms ease-in;
|
||||
}
|
||||
.htmx-request .htmx-indicator{
|
||||
opacity:1
|
||||
}
|
||||
/* Loader (https://cssloaders.github.io/) */
|
||||
.loader {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
display: block;
|
||||
margin:15px auto;
|
||||
position: relative;
|
||||
color: #FFF;
|
||||
box-sizing: border-box;
|
||||
animation: animloader 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes animloader {
|
||||
0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
25% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 2px; }
|
||||
50% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 2px, -38px 0 0 -2px; }
|
||||
75% { box-shadow: 14px 0 0 2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
100% { box-shadow: 14px 0 0 -2px, 38px 0 0 2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
}
|
||||
.progress {
|
||||
height: 20px;
|
||||
margin-bottom: 20px;
|
||||
overflow: hidden;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
|
||||
}
|
||||
.progress-bar {
|
||||
float: left;
|
||||
width: 0%;
|
||||
height: 100%;
|
||||
font-size: 12px;
|
||||
line-height: 20px;
|
||||
color: #fff;
|
||||
text-align: center;
|
||||
background-color: #337ab7;
|
||||
-webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
-webkit-transition: width .6s ease;
|
||||
-o-transition: width .6s ease;
|
||||
transition: width .6s ease;
|
||||
}
|
||||
|
||||
.user {
|
||||
background-color: #007bff;
|
||||
}
|
||||
|
||||
.assistant {
|
||||
background-color: #28a745;
|
||||
}
|
||||
|
||||
.message {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.user, .assistant {
|
||||
flex-grow: 1;
|
||||
margin: 0.5rem;
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style-type: disc; /* Adds bullet points */
|
||||
padding-left: 1.25rem; /* Indents the list from the left margin */
|
||||
margin-top: 1rem; /* Space above the list */
|
||||
}
|
||||
|
||||
li {
|
||||
font-size: 0.875rem; /* Small text size */
|
||||
color: #4a5568; /* Dark gray text */
|
||||
background-color: #f7fafc; /* Very light gray background */
|
||||
border-radius: 0.375rem; /* Rounded corners */
|
||||
padding: 0.5rem; /* Padding inside each list item */
|
||||
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */
|
||||
margin-bottom: 0.5rem; /* Vertical space between list items */
|
||||
}
|
||||
|
||||
li:last-child {
|
||||
margin-bottom: 0; /* Removes bottom margin from the last item */
|
||||
}
|
||||
96
core/http/static/image.js
Normal file
96
core/http/static/image.js
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
|
||||
https://github.com/david-haerer/chatapi
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 David Härer
|
||||
Copyright (c) 2024 Ettore Di Giacinto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
*/
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
|
||||
function genImage(event) {
|
||||
event.preventDefault();
|
||||
const input = document.getElementById("input").value;
|
||||
const key = localStorage.getItem("key");
|
||||
|
||||
promptDallE(key, input);
|
||||
|
||||
}
|
||||
|
||||
async function promptDallE(key, input) {
|
||||
document.getElementById("loader").style.display = "block";
|
||||
document.getElementById("input").value = "";
|
||||
document.getElementById("input").disabled = true;
|
||||
|
||||
const model = document.getElementById("image-model").value;
|
||||
const response = await fetch("/v1/images/generations", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
steps: 10,
|
||||
prompt: input,
|
||||
n: 1,
|
||||
size: "512x512",
|
||||
}),
|
||||
});
|
||||
const json = await response.json();
|
||||
if (json.error) {
|
||||
// Display error if there is one
|
||||
var div = document.getElementById('result'); // Get the div by its ID
|
||||
div.innerHTML = '<p style="color:red;">' + json.error.message + '</p>';
|
||||
return;
|
||||
}
|
||||
const url = json.data[0].url;
|
||||
|
||||
var div = document.getElementById('result'); // Get the div by its ID
|
||||
var img = document.createElement('img'); // Create a new img element
|
||||
img.src = url; // Set the source of the image
|
||||
img.alt = 'Generated image'; // Set the alt text of the image
|
||||
|
||||
div.innerHTML = ''; // Clear the existing content of the div
|
||||
div.appendChild(img); // Add the new img element to the div
|
||||
|
||||
document.getElementById("loader").style.display = "none";
|
||||
document.getElementById("input").disabled = false;
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
document.getElementById("input").focus();
|
||||
document.getElementById("genimage").addEventListener("submit", genImage);
|
||||
document.getElementById("loader").style.display = "none";
|
||||
|
||||
const storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
}
|
||||
|
||||
64
core/http/static/tts.js
Normal file
64
core/http/static/tts.js
Normal file
@@ -0,0 +1,64 @@
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
|
||||
function genAudio(event) {
|
||||
event.preventDefault();
|
||||
const input = document.getElementById("input").value;
|
||||
const key = localStorage.getItem("key");
|
||||
|
||||
tts(key, input);
|
||||
}
|
||||
|
||||
async function tts(key, input) {
|
||||
document.getElementById("loader").style.display = "block";
|
||||
document.getElementById("input").value = "";
|
||||
document.getElementById("input").disabled = true;
|
||||
|
||||
const model = document.getElementById("tts-model").value;
|
||||
const response = await fetch("/tts", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
input: input,
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
const jsonData = await response.json(); // Now safely parse JSON
|
||||
var div = document.getElementById('result');
|
||||
div.innerHTML = '<p style="color:red;">Error: ' +jsonData.error.message + '</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
var div = document.getElementById('result'); // Get the div by its ID
|
||||
var link=document.createElement('a');
|
||||
link.className = "m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong";
|
||||
link.innerHTML = "<i class='fa-solid fa-download'></i> Download result";
|
||||
const blob = await response.blob();
|
||||
link.href=window.URL.createObjectURL(blob);
|
||||
|
||||
div.innerHTML = ''; // Clear the existing content of the div
|
||||
div.appendChild(link); // Add the new img element to the div
|
||||
console.log(link)
|
||||
document.getElementById("loader").style.display = "none";
|
||||
document.getElementById("input").disabled = false;
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
document.getElementById("input").focus();
|
||||
document.getElementById("tts").addEventListener("submit", genAudio);
|
||||
document.getElementById("loader").style.display = "none";
|
||||
|
||||
const storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
}
|
||||
|
||||
202
core/http/views/chat.html
Normal file
202
core/http/views/chat.html
Normal file
@@ -0,0 +1,202 @@
|
||||
<!--
|
||||
|
||||
Part of this page is based on the OpenAI Chatbot example by David Härer:
|
||||
https://github.com/david-haerer/chatapi
|
||||
|
||||
MIT License Copyright (c) 2023 David Härer
|
||||
Copyright (c) 2024 Ettore Di Giacinto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
-->
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/chat.js"></script>
|
||||
<style>
|
||||
body {
|
||||
overflow: hidden;
|
||||
}
|
||||
</style>
|
||||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar"}}
|
||||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg" >
|
||||
<!-- Chat Header -->
|
||||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">
|
||||
|
||||
<div class="flex items-center justify-between">
|
||||
|
||||
<h1 class="text-lg font-semibold"> <i class="fa-solid fa-comments"></i> Chat with {{.Model}} <a href="https://localai.io/features/text-generation/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a></h1>
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
<button
|
||||
@click="$store.chat.clear()"
|
||||
id="clear"
|
||||
title="Clear chat history"
|
||||
|
||||
data-twe-ripple-init
|
||||
data-twe-ripple-color="light"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>
|
||||
Clear chat 🔥
|
||||
</button>
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
</div>
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
placeholder="OpenAI API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
🔒
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<!-- Options -->
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq .Name $model }}
|
||||
<option value="/chat/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ else }}
|
||||
<option value="/chat/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="chat-messages p-4" id="chat" x-data="{history: $store.chat.history}">
|
||||
<p id="usage" x-show="history.length === 0">
|
||||
Start chatting with the AI by typing a prompt in the input field below.
|
||||
</p>
|
||||
<div id="messages">
|
||||
<template x-for="message in history">
|
||||
<div class="message flex items-start space-x-2 my-2" >
|
||||
<!--<img :src="message.role === 'user' ? '/path/to/user-icon.png' : '/path/to/bot-icon.png'" alt="" class="h-6 w-6">-->
|
||||
<i class="fa-solid h-8 w-8" :class="message.role === 'user' ? 'fa-user' : 'fa-robot'" ></i>
|
||||
<div class="flex flex-col flex-1">
|
||||
<span class="text-xs font-semibold text-gray-600" x-text="message.role === 'user' ? 'User' : 'Assistant ({{.Model}})'"></span>
|
||||
<template x-if="message.role === 'user'">
|
||||
<div class="p-2 flex-1 rounded" :class="message.role" x-html="message.html"></div>
|
||||
</template>
|
||||
<template x-if="message.role === 'assistant'">
|
||||
<div class="p-2 flex-1 rounded" :class="message.role" x-html="message.html"></div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false }">
|
||||
<div id="loader" class="my-2 loader" style="display: none;"></div>
|
||||
<input id="chat-model" type="hidden" value="{{.Model}}">
|
||||
<form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
|
||||
<div class="relative w-full">
|
||||
<textarea
|
||||
id="input"
|
||||
name="input"
|
||||
x-model="inputValue"
|
||||
placeholder="Send a message..."
|
||||
class="p-2 pl-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
|
||||
required
|
||||
@keydown.shift="shiftPressed = true"
|
||||
@keyup.shift="shiftPressed = false"
|
||||
@keydown.enter="if (!shiftPressed) { submitPrompt($event); }"
|
||||
style="padding-right: 4rem;"
|
||||
></textarea>
|
||||
<button type=submit><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2 ml-2"></i></button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<script>
|
||||
document.addEventListener("alpine:init", () => {
|
||||
Alpine.store("chat", {
|
||||
history: [],
|
||||
languages: [undefined],
|
||||
clear() {
|
||||
this.history.length = 0;
|
||||
},
|
||||
add(role, content) {
|
||||
const N = this.history.length - 1;
|
||||
if (this.history.length && this.history[N].role === role) {
|
||||
this.history[N].content += content;
|
||||
str = this.history[N].content;
|
||||
this.history[N].html = DOMPurify.sanitize(
|
||||
marked.parse(this.history[N].content),
|
||||
);
|
||||
} else {
|
||||
c = ""
|
||||
// split content newlines in content
|
||||
const lines = content.split("\n");
|
||||
// for each line, do DOMPurify.sanitize(marked.parse(line)) and add it to c
|
||||
lines.forEach((line) => {
|
||||
c += DOMPurify.sanitize(marked.parse(line));
|
||||
});
|
||||
|
||||
this.history.push({
|
||||
role: role,
|
||||
content: content,
|
||||
html: c,
|
||||
});
|
||||
}
|
||||
|
||||
const parser = new DOMParser();
|
||||
const html = parser.parseFromString(
|
||||
this.history[this.history.length - 1].html,
|
||||
"text/html",
|
||||
);
|
||||
const code = html.querySelectorAll("pre code");
|
||||
if (!code.length) return;
|
||||
code.forEach((el) => {
|
||||
const language = el.className.split("language-")[1];
|
||||
if (this.languages.includes(language)) return;
|
||||
const script = document.createElement("script");
|
||||
script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
|
||||
document.head.appendChild(script);
|
||||
this.languages.push(language);
|
||||
});
|
||||
},
|
||||
messages() {
|
||||
return this.history.map((message) => {
|
||||
return {
|
||||
role: message.role,
|
||||
content: message.content,
|
||||
};
|
||||
});
|
||||
},
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
34
core/http/views/models.html
Normal file
34
core/http/views/models.html
Normal file
@@ -0,0 +1,34 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow">
|
||||
|
||||
<div class="models mt-12">
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
||||
🖼️ Available models from <i>{{ len .Repositories }}</i> repositories <a href="https://localai.io/models/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a></h2>
|
||||
|
||||
|
||||
<span class="htmx-indicator loader"></span>
|
||||
<input class="form-control appearance-none block w-full px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
|
||||
name="search" placeholder="Begin Typing To Search models..."
|
||||
hx-post="/browse/search/models"
|
||||
hx-trigger="input changed delay:500ms, search"
|
||||
hx-target="#search-results"
|
||||
hx-indicator=".htmx-indicator">
|
||||
|
||||
<div id="search-results">{{.Models}}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -2,12 +2,51 @@
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{{.Title}}</title>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
|
||||
/>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
|
||||
></script>
|
||||
|
||||
<link href="/static/general.css" rel="stylesheet" />
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
||||
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
|
||||
<link
|
||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
||||
rel="stylesheet" />
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
|
||||
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
|
||||
<script>
|
||||
tailwind.config = {
|
||||
darkMode: "class",
|
||||
theme: {
|
||||
fontFamily: {
|
||||
sans: ["Roboto", "sans-serif"],
|
||||
body: ["Roboto", "sans-serif"],
|
||||
mono: ["ui-monospace", "monospace"],
|
||||
},
|
||||
},
|
||||
corePlugins: {
|
||||
preflight: false,
|
||||
},
|
||||
};
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
||||
<style>
|
||||
body {
|
||||
font-family: 'Inter', sans-serif;
|
||||
}
|
||||
</style>
|
||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
||||
</head>
|
||||
@@ -9,6 +9,10 @@
|
||||
<div>
|
||||
<a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
|
||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
<a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
|
||||
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
||||
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
89
core/http/views/text2image.html
Normal file
89
core/http/views/text2image.html
Normal file
@@ -0,0 +1,89 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/image.js"></script>
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow " x-data="{ component: 'menu' }">
|
||||
|
||||
|
||||
<div class="mt-12">
|
||||
<div class="flex items-center justify-center text-center pb-2">
|
||||
<span class="text-3xl font-semibold text-gray-100">
|
||||
🖼️ Text to Image
|
||||
<a href="https://localai.io/models/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a>
|
||||
</span>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="text-center font-semibold text-gray-100">
|
||||
<div class="flex items-center justify-between">
|
||||
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
</div>
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
placeholder="OpenAI API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
🔒
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<!-- Options -->
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq .Name $model }}
|
||||
<option value="/text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ else }}
|
||||
<option value="/text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mt-12">
|
||||
<input id="image-model" type="hidden" value="{{.Model}}">
|
||||
<form id="genimage" action="/text2image/{{.Model}}" method="get">
|
||||
<input
|
||||
type="text"
|
||||
id="input"
|
||||
name="input"
|
||||
placeholder="Prompt…"
|
||||
autocomplete="off"
|
||||
class="p-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
|
||||
required
|
||||
/>
|
||||
</form>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="loader" class="my-2 loader" ></div>
|
||||
</div>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="result" class="mx-auto"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
86
core/http/views/tts.html
Normal file
86
core/http/views/tts.html
Normal file
@@ -0,0 +1,86 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/tts.js"></script>
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow " x-data="{ component: 'menu' }">
|
||||
<div class="mt-12">
|
||||
<div class="flex items-center justify-center text-center pb-2">
|
||||
<span class="text-3xl font-semibold text-gray-100">
|
||||
<i class="fa-solid fa-music"></i> Text to speech/audio
|
||||
<a href="https://localai.io/features/text-to-audio/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a>
|
||||
</span>
|
||||
|
||||
</div>
|
||||
<div class="text-center font-semibold text-gray-100">
|
||||
<div class="flex items-center justify-between">
|
||||
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
</div>
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
placeholder="OpenAI API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
🔒
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<!-- Options -->
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq .Name $model }}
|
||||
<option value="/tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ else }}
|
||||
<option value="/tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mt-12">
|
||||
<input id="tts-model" type="hidden" value="{{.Model}}">
|
||||
<form id="tts" action="/tts/{{.Model}}" method="get">
|
||||
<input
|
||||
type="text"
|
||||
id="input"
|
||||
name="input"
|
||||
placeholder="Prompt…"
|
||||
autocomplete="off"
|
||||
class="p-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
|
||||
required
|
||||
/>
|
||||
</form>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="loader" class="my-2 loader" ></div>
|
||||
</div>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="result" class="mx-auto"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
34
core/schema/jina.go
Normal file
34
core/schema/jina.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package schema
|
||||
|
||||
// RerankRequest defines the structure of the request payload
|
||||
type JINARerankRequest struct {
|
||||
Model string `json:"model"`
|
||||
Query string `json:"query"`
|
||||
Documents []string `json:"documents"`
|
||||
TopN int `json:"top_n"`
|
||||
}
|
||||
|
||||
// DocumentResult represents a single document result
|
||||
type JINADocumentResult struct {
|
||||
Index int `json:"index"`
|
||||
Document JINAText `json:"document"`
|
||||
RelevanceScore float64 `json:"relevance_score"`
|
||||
}
|
||||
|
||||
// Text holds the text of the document
|
||||
type JINAText struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// RerankResponse defines the structure of the response payload
|
||||
type JINARerankResponse struct {
|
||||
Model string `json:"model"`
|
||||
Usage JINAUsageInfo `json:"usage"`
|
||||
Results []JINADocumentResult `json:"results"`
|
||||
}
|
||||
|
||||
// UsageInfo holds information about usage of tokens
|
||||
type JINAUsageInfo struct {
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
}
|
||||
@@ -3,7 +3,7 @@ package schema
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
functions "github.com/go-skynet/LocalAI/pkg/functions"
|
||||
)
|
||||
|
||||
// APIError provides error information returned by the OpenAI API.
|
||||
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
|
||||
type OpenAIRequest struct {
|
||||
PredictionOptions
|
||||
|
||||
Context context.Context `json:"-"`
|
||||
Context context.Context `json:"-"`
|
||||
Cancel context.CancelFunc `json:"-"`
|
||||
|
||||
// whisper
|
||||
@@ -130,11 +130,11 @@ type OpenAIRequest struct {
|
||||
Messages []Message `json:"messages" yaml:"messages"`
|
||||
|
||||
// A list of available functions to call
|
||||
Functions []grammar.Function `json:"functions" yaml:"functions"`
|
||||
FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
|
||||
Functions functions.Functions `json:"functions" yaml:"functions"`
|
||||
FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
|
||||
|
||||
Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"`
|
||||
ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"`
|
||||
Tools []functions.Tool `json:"tools,omitempty" yaml:"tools"`
|
||||
ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"`
|
||||
|
||||
Stream bool `json:"stream"`
|
||||
|
||||
@@ -145,7 +145,7 @@ type OpenAIRequest struct {
|
||||
// A grammar to constrain the LLM output
|
||||
Grammar string `json:"grammar" yaml:"grammar"`
|
||||
|
||||
JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
||||
JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
||||
|
||||
Backend string `json:"backend" yaml:"backend"`
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ type Segment struct {
|
||||
Tokens []int `json:"tokens"`
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
type TranscriptionResult struct {
|
||||
Segments []Segment `json:"segments"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
@@ -15,22 +15,22 @@ import (
|
||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||
)
|
||||
|
||||
type BackendMonitor struct {
|
||||
configLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
|
||||
type BackendMonitorService struct {
|
||||
backendConfigLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
|
||||
}
|
||||
|
||||
func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor {
|
||||
return BackendMonitor{
|
||||
configLoader: configLoader,
|
||||
modelLoader: modelLoader,
|
||||
options: appConfig,
|
||||
func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService {
|
||||
return &BackendMonitorService{
|
||||
modelLoader: modelLoader,
|
||||
backendConfigLoader: configLoader,
|
||||
options: appConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) {
|
||||
config, exists := bm.configLoader.GetBackendConfig(modelName)
|
||||
func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) {
|
||||
config, exists := bms.backendConfigLoader.GetBackendConfig(modelName)
|
||||
var backendId string
|
||||
if exists {
|
||||
backendId = config.Model
|
||||
@@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string
|
||||
return backendId, nil
|
||||
}
|
||||
|
||||
func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
|
||||
config, exists := bm.configLoader.GetBackendConfig(model)
|
||||
func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
|
||||
config, exists := bms.backendConfigLoader.GetBackendConfig(model)
|
||||
var backend string
|
||||
if exists {
|
||||
backend = config.Model
|
||||
@@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
|
||||
backend = fmt.Sprintf("%s.bin", backend)
|
||||
}
|
||||
|
||||
pid, err := bm.modelLoader.GetGRPCPID(backend)
|
||||
pid, err := bms.modelLoader.GetGRPCPID(backend)
|
||||
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid")
|
||||
@@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
|
||||
backendId, err := bm.getModelLoaderIDFromModelName(modelName)
|
||||
func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
|
||||
backendId, err := bms.getModelLoaderIDFromModelName(modelName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modelAddr := bm.modelLoader.CheckIsLoaded(backendId)
|
||||
modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
|
||||
if modelAddr == "" {
|
||||
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||
}
|
||||
@@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse
|
||||
status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO())
|
||||
if rpcErr != nil {
|
||||
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
||||
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
||||
val, slbErr := bms.SampleLocalBackendProcess(backendId)
|
||||
if slbErr != nil {
|
||||
return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
|
||||
}
|
||||
@@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse
|
||||
return status, nil
|
||||
}
|
||||
|
||||
func (bm BackendMonitor) ShutdownModel(modelName string) error {
|
||||
backendId, err := bm.getModelLoaderIDFromModelName(modelName)
|
||||
func (bms BackendMonitorService) ShutdownModel(modelName string) error {
|
||||
backendId, err := bms.getModelLoaderIDFromModelName(modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return bm.modelLoader.ShutdownModel(backendId)
|
||||
return bms.modelLoader.ShutdownModel(backendId)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@@ -84,18 +85,47 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
}
|
||||
|
||||
var err error
|
||||
// if the request contains a gallery name, we apply the gallery from the gallery list
|
||||
if op.GalleryName != "" {
|
||||
if strings.Contains(op.GalleryName, "@") {
|
||||
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
} else {
|
||||
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
|
||||
// delete a model
|
||||
if op.Delete {
|
||||
modelConfig := &config.BackendConfig{}
|
||||
// Galleryname is the name of the model in this case
|
||||
dat, err := os.ReadFile(filepath.Join(g.modelPath, op.GalleryName+".yaml"))
|
||||
if err != nil {
|
||||
updateError(err)
|
||||
continue
|
||||
}
|
||||
} else if op.ConfigURL != "" {
|
||||
startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
|
||||
err = cl.Preload(g.modelPath)
|
||||
err = yaml.Unmarshal(dat, modelConfig)
|
||||
if err != nil {
|
||||
updateError(err)
|
||||
continue
|
||||
}
|
||||
|
||||
files := []string{}
|
||||
// Remove the model from the config
|
||||
if modelConfig.Model != "" {
|
||||
files = append(files, modelConfig.ModelFileName())
|
||||
}
|
||||
|
||||
if modelConfig.MMProj != "" {
|
||||
files = append(files, modelConfig.MMProjFileName())
|
||||
}
|
||||
|
||||
err = gallery.DeleteModelFromSystem(g.modelPath, op.GalleryName, files)
|
||||
} else {
|
||||
err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
|
||||
// if the request contains a gallery name, we apply the gallery from the gallery list
|
||||
if op.GalleryName != "" {
|
||||
if strings.Contains(op.GalleryName, "@") {
|
||||
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
} else {
|
||||
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
}
|
||||
} else if op.ConfigURL != "" {
|
||||
startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
|
||||
err = cl.Preload(g.modelPath)
|
||||
} else {
|
||||
err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -116,7 +146,12 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
continue
|
||||
}
|
||||
|
||||
g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Processed: true, Message: "completed", Progress: 100})
|
||||
g.UpdateStatus(op.Id,
|
||||
&gallery.GalleryOpStatus{
|
||||
Deletion: op.Delete,
|
||||
Processed: true,
|
||||
Message: "completed",
|
||||
Progress: 100})
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user