mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-23 08:10:48 -04:00
Compare commits
153 Commits
gosec_fix
...
fix/closed
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83110891fd | ||
|
|
2553de0187 | ||
|
|
408dfe62ee | ||
|
|
648ffdf449 | ||
|
|
04c0841ca9 | ||
|
|
43144c4743 | ||
|
|
a778668bcd | ||
|
|
4b131a7090 | ||
|
|
d06a052d54 | ||
|
|
b5115903bf | ||
|
|
afaff175d0 | ||
|
|
4686877c6d | ||
|
|
e5586e8781 | ||
|
|
3acd767ac4 | ||
|
|
5488fc3bc1 | ||
|
|
0965c6cd68 | ||
|
|
db704199dc | ||
|
|
2cc3b7128e | ||
|
|
88b99d30bb | ||
|
|
307a835199 | ||
|
|
f84b55d1ef | ||
|
|
139209353f | ||
|
|
a30058b80f | ||
|
|
53f406dc35 | ||
|
|
2649407f44 | ||
|
|
0a8f627cce | ||
|
|
76d4e88e0c | ||
|
|
d4d2a76f8f | ||
|
|
7d306c6431 | ||
|
|
44bdacac61 | ||
|
|
6bd6e2bdeb | ||
|
|
2908ff3f6b | ||
|
|
f19277b8e2 | ||
|
|
32de75c683 | ||
|
|
164a9e972f | ||
|
|
d747f2c89b | ||
|
|
58662db48e | ||
|
|
078942fc9f | ||
|
|
6dfee99575 | ||
|
|
ad62156d54 | ||
|
|
1689740269 | ||
|
|
50a3b54e34 | ||
|
|
e94a50e9db | ||
|
|
4e0f3cc980 | ||
|
|
2a8cbad122 | ||
|
|
453c45d022 | ||
|
|
4550abbfce | ||
|
|
f2ba1cfb01 | ||
|
|
8c4196faf3 | ||
|
|
b0f4556c0f | ||
|
|
fa5c98549a | ||
|
|
3d12d2037c | ||
|
|
d6522e69ca | ||
|
|
ef1507d000 | ||
|
|
a3d69872e3 | ||
|
|
33b2d38dd0 | ||
|
|
74408bdc77 | ||
|
|
8c4f720fb5 | ||
|
|
8002ad27cb | ||
|
|
1b8a77433a | ||
|
|
a370a11115 | ||
|
|
aa87eff283 | ||
|
|
0d784f46e5 | ||
|
|
c54cfd3609 | ||
|
|
6555994060 | ||
|
|
0893d3cbbe | ||
|
|
90cacb9692 | ||
|
|
69d2902b0a | ||
|
|
c1752cbb83 | ||
|
|
b8e129f2a6 | ||
|
|
cc6fac1688 | ||
|
|
043cb94436 | ||
|
|
bbdf78615e | ||
|
|
e332ff8066 | ||
|
|
26d99ed1c7 | ||
|
|
1da8d8b9db | ||
|
|
bf8f8671d1 | ||
|
|
51cba89682 | ||
|
|
3e8e71f8b6 | ||
|
|
4edd8c80b4 | ||
|
|
fd70a22196 | ||
|
|
56f4deb938 | ||
|
|
9bd7f3f995 | ||
|
|
ee21b00a8d | ||
|
|
1f43678d53 | ||
|
|
20c0e128c0 | ||
|
|
5c3d1d81e6 | ||
|
|
c22b3187a7 | ||
|
|
54f2657870 | ||
|
|
cef7f8a014 | ||
|
|
bf8e50a11d | ||
|
|
6c6cd8bbe0 | ||
|
|
00d6c2a966 | ||
|
|
415cf31aa3 | ||
|
|
f55053bfba | ||
|
|
e24654ada0 | ||
|
|
c4cecba07f | ||
|
|
38cad0b8dc | ||
|
|
052af98dcd | ||
|
|
56d8f5163c | ||
|
|
b6af4f4467 | ||
|
|
a5b08f43ff | ||
|
|
c15f506fd5 | ||
|
|
a2a63460e9 | ||
|
|
2fcea486eb | ||
|
|
5c9d26e39b | ||
|
|
191bc2e50a | ||
|
|
fbb9facda4 | ||
|
|
c6a819e92f | ||
|
|
a50cde69a2 | ||
|
|
e5bd74878e | ||
|
|
dc98b2ea44 | ||
|
|
acf119828f | ||
|
|
a53392f919 | ||
|
|
eee1fb2c75 | ||
|
|
8826ca93b3 | ||
|
|
5049629381 | ||
|
|
92136a5d34 | ||
|
|
075e5015c0 | ||
|
|
46fd4ff6db | ||
|
|
4a4e44bf55 | ||
|
|
22247ad92c | ||
|
|
d0f2bf3181 | ||
|
|
0e4e101101 | ||
|
|
f4b1bd8f6d | ||
|
|
e95cb8eaac | ||
|
|
db1159b651 | ||
|
|
a9a3a07c3b | ||
|
|
06c8339862 | ||
|
|
2394f7833f | ||
|
|
36e19928eb | ||
|
|
abc27e0dc4 | ||
|
|
42d6b9e0cc | ||
|
|
c866b77586 | ||
|
|
5356b81b7f | ||
|
|
30fe163100 | ||
|
|
afb5bbc1b8 | ||
|
|
12a8d0e46f | ||
|
|
09c7d8d458 | ||
|
|
149cc1eb13 | ||
|
|
a5ce987bdb | ||
|
|
2edc732c33 | ||
|
|
fec01d9e69 | ||
|
|
9ca5ef339a | ||
|
|
a8003f2b7c | ||
|
|
25deb4ba95 | ||
|
|
3d3db1d74f | ||
|
|
cabb1602e8 | ||
|
|
25e7661de2 | ||
|
|
cbfab81c35 | ||
|
|
925315ab5c | ||
|
|
5213e79f5c | ||
|
|
7fe6d0ad2b |
@@ -9,6 +9,7 @@
|
|||||||
# Param 2: email
|
# Param 2: email
|
||||||
#
|
#
|
||||||
config_user() {
|
config_user() {
|
||||||
|
echo "Configuring git for $1 <$2>"
|
||||||
local gcn=$(git config --global user.name)
|
local gcn=$(git config --global user.name)
|
||||||
if [ -z "${gcn}" ]; then
|
if [ -z "${gcn}" ]; then
|
||||||
echo "Setting up git user / remote"
|
echo "Setting up git user / remote"
|
||||||
@@ -24,6 +25,7 @@ config_user() {
|
|||||||
# Param 2: remote url
|
# Param 2: remote url
|
||||||
#
|
#
|
||||||
config_remote() {
|
config_remote() {
|
||||||
|
echo "Adding git remote and fetching $2 as $1"
|
||||||
local gr=$(git remote -v | grep $1)
|
local gr=$(git remote -v | grep $1)
|
||||||
if [ -z "${gr}" ]; then
|
if [ -z "${gr}" ]; then
|
||||||
git remote add $1 $2
|
git remote add $1 $2
|
||||||
|
|||||||
11
.github/check_and_update.py
vendored
11
.github/check_and_update.py
vendored
@@ -29,9 +29,14 @@ def calculate_sha256(file_path):
|
|||||||
def manual_safety_check_hf(repo_id):
|
def manual_safety_check_hf(repo_id):
|
||||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
||||||
scan = scanResponse.json()
|
scan = scanResponse.json()
|
||||||
if scan['hasUnsafeFile']:
|
# Check if 'hasUnsafeFile' exists in the response
|
||||||
return scan
|
if 'hasUnsafeFile' in scan:
|
||||||
return None
|
if scan['hasUnsafeFile']:
|
||||||
|
return scan
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
download_type, repo_id_or_url = parse_uri(uri)
|
download_type, repo_id_or_url = parse_uri(uri)
|
||||||
|
|
||||||
|
|||||||
117
.github/workflows/image.yml
vendored
117
.github/workflows/image.yml
vendored
@@ -13,6 +13,78 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
hipblas-jobs:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: 2
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
aio: "-aio-gpu-hipblas"
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
latest-image: 'latest-gpu-hipblas'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-core'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
self-hosted-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@@ -39,7 +111,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
# Extra images
|
||||||
@@ -122,29 +194,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'extras'
|
|
||||||
aio: "-aio-gpu-hipblas"
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
latest-image: 'latest-gpu-hipblas'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
@@ -212,26 +261,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-core'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.21.0
|
uses: securego/gosec@v2.21.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
@@ -178,13 +178,22 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
# Install protoc
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
|
|||||||
@@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
- [Documentation](#documentation)
|
- [Documentation](#documentation)
|
||||||
- [Community and Communication](#community-and-communication)
|
- [Community and Communication](#community-and-communication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
@@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
## Coding Guidelines
|
## Coding Guidelines
|
||||||
|
|
||||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
@@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a
|
|||||||
- You can reach out via the Github issue tracker.
|
- You can reach out via the Github issue tracker.
|
||||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
---
|
|
||||||
|
|||||||
13
Dockerfile
13
Dockerfile
@@ -297,10 +297,10 @@ COPY .git .
|
|||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
## If it's CUDA, we want to skip some of the llama-compat backends to save space
|
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas build
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
## (both will use CUDA for the actual computation)
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
@@ -338,9 +338,8 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
|
|||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ssh less && \
|
ssh less wget
|
||||||
apt-get clean && \
|
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||||
|
|
||||||
|
|||||||
9
Makefile
9
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1
|
CPPLLAMA_VERSION?=d5ed2b929d85bbd7dbeecb690880f07d9d7a6077
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
|
WHISPER_CPP_VERSION?=ccc2547210e09e3a1785817383ab770389bb442b
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
@@ -359,6 +359,9 @@ clean-tests:
|
|||||||
rm -rf test-dir
|
rm -rf test-dir
|
||||||
rm -rf core/http/backend-assets
|
rm -rf core/http/backend-assets
|
||||||
|
|
||||||
|
clean-dc: clean
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: prepare backend-assets grpcs ## Build the project
|
build: prepare backend-assets grpcs ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
@@ -465,7 +468,7 @@ run-e2e-image:
|
|||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
run-e2e-aio:
|
run-e2e-aio: protogen-go
|
||||||
@echo 'Running e2e AIO tests'
|
@echo 'Running e2e AIO tests'
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
||||||
|
|
||||||
|
|||||||
10
README.md
10
README.md
@@ -68,9 +68,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 🔥🔥 Hot topics / Roadmap
|
## 📰 Latest project news
|
||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
|
||||||
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
@@ -83,8 +81,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
|
## 🔥🔥 Hot topics (looking for help):
|
||||||
|
|
||||||
|
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||||
|
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
mmap: false
|
mmap: false
|
||||||
f16: false
|
f16: false
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
|||||||
@@ -26,6 +26,19 @@ service Backend {
|
|||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
|
||||||
|
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define the empty request
|
||||||
|
message MetricsRequest {}
|
||||||
|
|
||||||
|
message MetricsResponse {
|
||||||
|
int32 slot_id = 1;
|
||||||
|
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
|
||||||
|
float tokens_per_second = 3;
|
||||||
|
int32 tokens_generated = 4;
|
||||||
|
int32 prompt_tokens_processed = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message RerankRequest {
|
message RerankRequest {
|
||||||
@@ -134,6 +147,9 @@ message PredictOptions {
|
|||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
bool UseTokenizerTemplate = 43;
|
bool UseTokenizerTemplate = 43;
|
||||||
repeated Message Messages = 44;
|
repeated Message Messages = 44;
|
||||||
|
repeated string Videos = 45;
|
||||||
|
repeated string Audios = 46;
|
||||||
|
string CorrelationId = 47;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "llava.h"
|
#include "llava.h"
|
||||||
|
#include "log.h"
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
@@ -448,7 +449,7 @@ struct llama_server_context
|
|||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
|
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -462,7 +463,7 @@ struct llama_server_context
|
|||||||
ctx = llama_init.context;
|
ctx = llama_init.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -470,7 +471,7 @@ struct llama_server_context
|
|||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_n_embd(model);
|
const int n_embd_llm = llama_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return false;
|
return false;
|
||||||
@@ -489,11 +490,21 @@ struct llama_server_context
|
|||||||
std::vector<char> buf(1);
|
std::vector<char> buf(1);
|
||||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
||||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_client_slot* get_active_slot() {
|
||||||
|
for (llama_client_slot& slot : slots) {
|
||||||
|
// Check if the slot is currently processing
|
||||||
|
if (slot.is_processing()) {
|
||||||
|
return &slot; // Return the active slot
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr; // No active slot found
|
||||||
|
}
|
||||||
|
|
||||||
void initialize() {
|
void initialize() {
|
||||||
// create slots
|
// create slots
|
||||||
all_slots_are_idle = true;
|
all_slots_are_idle = true;
|
||||||
@@ -812,10 +823,11 @@ struct llama_server_context
|
|||||||
img_sl.img_data = clip_image_u8_init();
|
img_sl.img_data = clip_image_u8_init();
|
||||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed to load image", {
|
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||||
{"slot_id", slot->id},
|
__func__,
|
||||||
{"img_sl_id", img_sl.id}
|
slot->id,
|
||||||
});
|
img_sl.id
|
||||||
|
);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOG_VERBOSE("image loaded", {
|
LOG_VERBOSE("image loaded", {
|
||||||
@@ -853,12 +865,12 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
|
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (const std::invalid_argument& e) {
|
} catch (const std::invalid_argument& e) {
|
||||||
LOG_TEE("Invalid image number id in prompt\n");
|
LOG("Invalid image number id in prompt\n");
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -886,7 +898,7 @@ struct llama_server_context
|
|||||||
{"task_id", slot->task_id},
|
{"task_id", slot->task_id},
|
||||||
});
|
});
|
||||||
|
|
||||||
// LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -926,7 +938,7 @@ struct llama_server_context
|
|||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view) != 0)
|
if (llama_decode(ctx, batch_view) != 0)
|
||||||
{
|
{
|
||||||
LOG_TEE("%s: llama_decode() failed\n", __func__);
|
LOG("%s: llama_decode() failed\n", __func__);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -938,7 +950,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("system prompt updated\n");
|
LOG("system prompt updated\n");
|
||||||
system_need_update = false;
|
system_need_update = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1120,7 +1132,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||||
LOG_TEE("Error processing the given image");
|
LOG("Error processing the given image");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1132,7 +1144,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
void send_error(task_server& task, const std::string &error)
|
void send_error(task_server& task, const std::string &error)
|
||||||
{
|
{
|
||||||
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
LOG("task %i - error: %s\n", task.id, error.c_str());
|
||||||
task_result res;
|
task_result res;
|
||||||
res.id = task.id;
|
res.id = task.id;
|
||||||
res.multitask_id = task.multitask_id;
|
res.multitask_id = task.multitask_id;
|
||||||
@@ -1371,7 +1383,7 @@ struct llama_server_context
|
|||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view))
|
if (llama_decode(ctx, batch_view))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval\n", __func__);
|
LOG("%s : failed to eval\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1389,7 +1401,7 @@ struct llama_server_context
|
|||||||
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||||
if (llama_decode(ctx, batch_img))
|
if (llama_decode(ctx, batch_img))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval image\n", __func__);
|
LOG("%s : failed to eval image\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
slot.n_past += n_eval;
|
slot.n_past += n_eval;
|
||||||
@@ -1572,7 +1584,7 @@ struct llama_server_context
|
|||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.truncated = false;
|
slot.truncated = false;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
// END LOCALAI changes
|
// END LOCALAI changes
|
||||||
@@ -1820,10 +1832,11 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (has_images && !ingest_images(slot, n_batch))
|
if (has_images && !ingest_images(slot, n_batch))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed processing images", {
|
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
|
||||||
"slot_id", slot.id,
|
__func__,
|
||||||
"task_id", slot.task_id,
|
slot.id,
|
||||||
});
|
slot.task_id
|
||||||
|
);
|
||||||
// FIXME @phymbert: to be properly tested
|
// FIXME @phymbert: to be properly tested
|
||||||
// early returning without changing the slot state will block the slot for ever
|
// early returning without changing the slot state will block the slot for ever
|
||||||
// no one at the moment is checking the return value
|
// no one at the moment is checking the return value
|
||||||
@@ -1863,10 +1876,10 @@ struct llama_server_context
|
|||||||
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
||||||
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
||||||
|
|
||||||
LOG_TEE("\n");
|
LOG("\n");
|
||||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
||||||
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
||||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
||||||
|
|
||||||
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
||||||
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
||||||
@@ -1876,7 +1889,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
slot.ga_i += slot.ga_w / slot.ga_n;
|
slot.ga_i += slot.ga_w / slot.ga_n;
|
||||||
|
|
||||||
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
||||||
}
|
}
|
||||||
slot.n_past_se += n_tokens;
|
slot.n_past_se += n_tokens;
|
||||||
}
|
}
|
||||||
@@ -1901,11 +1914,11 @@ struct llama_server_context
|
|||||||
if (n_batch == 1 || ret < 0)
|
if (n_batch == 1 || ret < 0)
|
||||||
{
|
{
|
||||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||||
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||||
|
|
||||||
// retry with half the batch size to try to find a free slot in the KV cache
|
// retry with half the batch size to try to find a free slot in the KV cache
|
||||||
n_batch /= 2;
|
n_batch /= 2;
|
||||||
@@ -2103,6 +2116,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
data["embeddings"] = predict->embeddings();
|
data["embeddings"] = predict->embeddings();
|
||||||
|
|
||||||
|
// Add the correlationid to json data
|
||||||
|
data["correlation_id"] = predict->correlationid();
|
||||||
|
|
||||||
// for each image in the request, add the image data
|
// for each image in the request, add the image data
|
||||||
//
|
//
|
||||||
for (int i = 0; i < predict->images_size(); i++) {
|
for (int i = 0; i < predict->images_size(); i++) {
|
||||||
@@ -2341,6 +2357,11 @@ public:
|
|||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
|
|
||||||
@@ -2364,6 +2385,12 @@ public:
|
|||||||
std::string completion_text;
|
std::string completion_text;
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
@@ -2403,6 +2430,31 @@ public:
|
|||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||||
|
llama_client_slot* active_slot = llama.get_active_slot();
|
||||||
|
|
||||||
|
if (active_slot != nullptr) {
|
||||||
|
// Calculate the tokens per second using existing logic
|
||||||
|
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
|
||||||
|
|
||||||
|
// Populate the response with metrics
|
||||||
|
response->set_slot_id(active_slot->id);
|
||||||
|
response->set_prompt_json_for_slot(active_slot->prompt.dump());
|
||||||
|
response->set_tokens_per_second(tokens_per_second);
|
||||||
|
response->set_tokens_generated(active_slot->n_decoded);
|
||||||
|
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
|
||||||
|
} else {
|
||||||
|
// Handle case when no active slot exists
|
||||||
|
response->set_slot_id(0);
|
||||||
|
response->set_prompt_json_for_slot("");
|
||||||
|
response->set_tokens_per_second(0);
|
||||||
|
response->set_tokens_generated(0);
|
||||||
|
response->set_prompt_tokens_processed(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void RunServer(const std::string& server_address) {
|
void RunServer(const std::string& server_address) {
|
||||||
|
|||||||
@@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -3,6 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
@@ -3,6 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
TTS==0.22.0
|
coqui-tts
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -3,7 +3,7 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
@@ -18,6 +18,6 @@ python-dotenv
|
|||||||
pypinyin==0.50.0
|
pypinyin==0.50.0
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==4.38.1
|
gradio==4.44.1
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
|||||||
@@ -15,5 +15,12 @@ installRequirements
|
|||||||
|
|
||||||
# https://github.com/descriptinc/audiotools/issues/101
|
# https://github.com/descriptinc/audiotools/issues/101
|
||||||
# incompatible protobuf versions.
|
# incompatible protobuf versions.
|
||||||
PYDIR=$(ls ${MY_DIR}/venv/lib)
|
PYDIR=python3.10
|
||||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
|
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
||||||
|
|
||||||
|
if [ ! -d ${pyenv} ]; then
|
||||||
|
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
||||||
|
|||||||
@@ -3,6 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
@@ -5,4 +5,4 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
rerankers[transformers]
|
rerankers[transformers]
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -55,7 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
"""
|
"""
|
||||||
model_name = request.Model
|
model_name = request.Model
|
||||||
try:
|
try:
|
||||||
self.model = SentenceTransformer(model_name)
|
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
|
|||||||
@@ -2,5 +2,5 @@ torch
|
|||||||
accelerate
|
accelerate
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
torch
|
torch
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
||||||
@@ -4,5 +4,5 @@ torch
|
|||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
datasets
|
||||||
|
einops
|
||||||
@@ -4,4 +4,4 @@ transformers
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
scipy==1.14.0
|
scipy==1.14.0
|
||||||
certifi
|
certifi
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -4,4 +4,4 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -5,6 +5,8 @@ import argparse
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from typing import List
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
@@ -15,6 +17,8 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.utils import random_uuid
|
from vllm.utils import random_uuid
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
from vllm.multimodal.utils import fetch_image
|
||||||
|
from vllm.assets.video import VideoAsset
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
@@ -105,6 +109,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
try:
|
try:
|
||||||
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -117,7 +122,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
)
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
print("Model loaded successfully", file=sys.stderr)
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
async def Predict(self, request, context):
|
async def Predict(self, request, context):
|
||||||
@@ -196,15 +201,33 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.Seed != 0:
|
if request.Seed != 0:
|
||||||
sampling_params.seed = request.Seed
|
sampling_params.seed = request.Seed
|
||||||
|
|
||||||
|
# Extract image paths and process images
|
||||||
prompt = request.Prompt
|
prompt = request.Prompt
|
||||||
|
|
||||||
# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
|
image_paths = request.Images
|
||||||
|
image_data = [self.load_image(img_path) for img_path in image_paths]
|
||||||
|
|
||||||
|
videos_path = request.Videos
|
||||||
|
video_data = [self.load_video(video_path) for video_path in videos_path]
|
||||||
|
|
||||||
|
# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
|
||||||
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||||
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
# Generate text
|
# Generate text using the LLM engine
|
||||||
request_id = random_uuid()
|
request_id = random_uuid()
|
||||||
outputs = self.llm.generate(prompt, sampling_params, request_id)
|
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
|
||||||
|
outputs = self.llm.generate(
|
||||||
|
{
|
||||||
|
"prompt": prompt,
|
||||||
|
"multi_modal_data": {
|
||||||
|
"image": image_data if image_data else None,
|
||||||
|
"video": video_data if video_data else None,
|
||||||
|
} if image_data or video_data else None,
|
||||||
|
},
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
|
||||||
# Stream the results
|
# Stream the results
|
||||||
generated_text = ""
|
generated_text = ""
|
||||||
@@ -227,9 +250,49 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if streaming:
|
if streaming:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Remove the image files from /tmp folder
|
||||||
|
for img_path in image_paths:
|
||||||
|
try:
|
||||||
|
os.remove(img_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
||||||
|
|
||||||
# Sending the final generated text
|
# Sending the final generated text
|
||||||
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
||||||
|
|
||||||
|
def load_image(self, image_path: str):
|
||||||
|
"""
|
||||||
|
Load an image from the given file path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): The path to the image file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Image: The loaded image.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return Image.open(image_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
|
||||||
|
return self.load_video(image_path)
|
||||||
|
|
||||||
|
def load_video(self, video_path: str):
|
||||||
|
"""
|
||||||
|
Load a video from the given file path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_path (str): The path to the image file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Video: The loaded video.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
video = VideoAsset(name=video_path).np_ndarrays
|
||||||
|
return video
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
|||||||
@@ -13,4 +13,18 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
installRequirements
|
if [ "x${BUILD_TYPE}" == "x" ]; then
|
||||||
|
ensureVenv
|
||||||
|
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
|
||||||
|
if [ ! -d vllm ]; then
|
||||||
|
git clone https://github.com/vllm-project/vllm
|
||||||
|
fi
|
||||||
|
pushd vllm
|
||||||
|
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
|
||||||
|
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
VLLM_TARGET_DEVICE=cpu python setup.py install
|
||||||
|
popd
|
||||||
|
rm -rf vllm
|
||||||
|
else
|
||||||
|
installRequirements
|
||||||
|
fi
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
||||||
@@ -4,4 +4,5 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
bitsandbytes
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
@@ -10,20 +10,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||||
modelFile := backendConfig.Model
|
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
|
||||||
|
|
||||||
var inferenceModel interface{}
|
var inferenceModel interface{}
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
model.WithThreads(uint32(*backendConfig.Threads)),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
})
|
|
||||||
|
|
||||||
if backendConfig.Backend == "" {
|
if backendConfig.Backend == "" {
|
||||||
inferenceModel, err = loader.GreedyLoader(opts...)
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
|
|||||||
@@ -8,19 +8,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
||||||
threads := backendConfig.Threads
|
|
||||||
if *threads == 0 && appConfig.Threads != 0 {
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
threads = &appConfig.Threads
|
|
||||||
}
|
|
||||||
gRPCOpts := gRPCModelOpts(backendConfig)
|
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(backendConfig.Backend),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithThreads(uint32(*threads)),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithModel(backendConfig.Model),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(gRPCOpts),
|
|
||||||
})
|
|
||||||
|
|
||||||
inferenceModel, err := loader.BackendLoader(
|
inferenceModel, err := loader.BackendLoader(
|
||||||
opts...,
|
opts...,
|
||||||
|
|||||||
@@ -31,24 +31,13 @@ type TokenUsage struct {
|
|||||||
Completion int
|
Completion int
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
threads := c.Threads
|
|
||||||
if *threads == 0 && o.Threads != 0 {
|
|
||||||
threads = &o.Threads
|
|
||||||
}
|
|
||||||
grpcOpts := gRPCModelOpts(c)
|
|
||||||
|
|
||||||
var inferenceModel grpc.Backend
|
var inferenceModel grpc.Backend
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := modelOpts(c, o, []model.Option{
|
opts := ModelOptions(c, o, []model.Option{})
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
|
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(o.Context),
|
|
||||||
})
|
|
||||||
|
|
||||||
if c.Backend != "" {
|
if c.Backend != "" {
|
||||||
opts = append(opts, model.WithBackendString(c.Backend))
|
opts = append(opts, model.WithBackendString(c.Backend))
|
||||||
@@ -101,6 +90,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
opts.Messages = protoMessages
|
opts.Messages = protoMessages
|
||||||
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||||
opts.Images = images
|
opts.Images = images
|
||||||
|
opts.Videos = videos
|
||||||
|
opts.Audios = audios
|
||||||
|
|
||||||
tokenUsage := TokenUsage{}
|
tokenUsage := TokenUsage{}
|
||||||
|
|
||||||
|
|||||||
@@ -11,32 +11,65 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
||||||
|
name := c.Name
|
||||||
|
if name == "" {
|
||||||
|
name = c.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
defOpts := []model.Option{
|
||||||
|
model.WithBackendString(c.Backend),
|
||||||
|
model.WithModel(c.Model),
|
||||||
|
model.WithAssetDir(so.AssetsDestination),
|
||||||
|
model.WithContext(so.Context),
|
||||||
|
model.WithModelID(name),
|
||||||
|
}
|
||||||
|
|
||||||
|
threads := 1
|
||||||
|
|
||||||
|
if c.Threads != nil {
|
||||||
|
threads = *c.Threads
|
||||||
|
}
|
||||||
|
|
||||||
|
if so.Threads != 0 {
|
||||||
|
threads = so.Threads
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Threads = &threads
|
||||||
|
|
||||||
|
grpcOpts := grpcModelOpts(c)
|
||||||
|
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
|
||||||
|
|
||||||
if so.SingleBackend {
|
if so.SingleBackend {
|
||||||
opts = append(opts, model.WithSingleActiveBackend())
|
defOpts = append(defOpts, model.WithSingleActiveBackend())
|
||||||
}
|
}
|
||||||
|
|
||||||
if so.ParallelBackendRequests {
|
if so.ParallelBackendRequests {
|
||||||
opts = append(opts, model.EnableParallelRequests)
|
defOpts = append(defOpts, model.EnableParallelRequests)
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.GRPC.Attempts != 0 {
|
if c.GRPC.Attempts != 0 {
|
||||||
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.GRPC.AttemptsSleepTime != 0 {
|
if c.GRPC.AttemptsSleepTime != 0 {
|
||||||
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
|
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
for k, v := range so.ExternalGRPCBackends {
|
for k, v := range so.ExternalGRPCBackends {
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
defOpts = append(defOpts, model.WithExternalBackend(k, v))
|
||||||
}
|
}
|
||||||
|
|
||||||
return opts
|
return append(defOpts, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSeed(c config.BackendConfig) int32 {
|
func getSeed(c config.BackendConfig) int32 {
|
||||||
seed := int32(*c.Seed)
|
var seed int32 = config.RAND_SEED
|
||||||
|
|
||||||
|
if c.Seed != nil {
|
||||||
|
seed = int32(*c.Seed)
|
||||||
|
}
|
||||||
|
|
||||||
if seed == config.RAND_SEED {
|
if seed == config.RAND_SEED {
|
||||||
seed = rand.Int31()
|
seed = rand.Int31()
|
||||||
}
|
}
|
||||||
@@ -44,11 +77,47 @@ func getSeed(c config.BackendConfig) int32 {
|
|||||||
return seed
|
return seed
|
||||||
}
|
}
|
||||||
|
|
||||||
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||||
b := 512
|
b := 512
|
||||||
if c.Batch != 0 {
|
if c.Batch != 0 {
|
||||||
b = c.Batch
|
b = c.Batch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f16 := false
|
||||||
|
if c.F16 != nil {
|
||||||
|
f16 = *c.F16
|
||||||
|
}
|
||||||
|
|
||||||
|
embeddings := false
|
||||||
|
if c.Embeddings != nil {
|
||||||
|
embeddings = *c.Embeddings
|
||||||
|
}
|
||||||
|
|
||||||
|
lowVRAM := false
|
||||||
|
if c.LowVRAM != nil {
|
||||||
|
lowVRAM = *c.LowVRAM
|
||||||
|
}
|
||||||
|
|
||||||
|
mmap := false
|
||||||
|
if c.MMap != nil {
|
||||||
|
mmap = *c.MMap
|
||||||
|
}
|
||||||
|
|
||||||
|
ctxSize := 1024
|
||||||
|
if c.ContextSize != nil {
|
||||||
|
ctxSize = *c.ContextSize
|
||||||
|
}
|
||||||
|
|
||||||
|
mmlock := false
|
||||||
|
if c.MMlock != nil {
|
||||||
|
mmlock = *c.MMlock
|
||||||
|
}
|
||||||
|
|
||||||
|
nGPULayers := 9999999
|
||||||
|
if c.NGPULayers != nil {
|
||||||
|
nGPULayers = *c.NGPULayers
|
||||||
|
}
|
||||||
|
|
||||||
return &pb.ModelOptions{
|
return &pb.ModelOptions{
|
||||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
@@ -56,14 +125,14 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
CFGScale: c.Diffusers.CFGScale,
|
CFGScale: c.Diffusers.CFGScale,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
LoraScale: c.LoraScale,
|
LoraScale: c.LoraScale,
|
||||||
F16Memory: *c.F16,
|
F16Memory: f16,
|
||||||
LoraBase: c.LoraBase,
|
LoraBase: c.LoraBase,
|
||||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||||
CLIPModel: c.Diffusers.ClipModel,
|
CLIPModel: c.Diffusers.ClipModel,
|
||||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||||
ControlNet: c.Diffusers.ControlNet,
|
ControlNet: c.Diffusers.ControlNet,
|
||||||
ContextSize: int32(*c.ContextSize),
|
ContextSize: int32(ctxSize),
|
||||||
Seed: getSeed(c),
|
Seed: getSeed(c),
|
||||||
NBatch: int32(b),
|
NBatch: int32(b),
|
||||||
NoMulMatQ: c.NoMulMatQ,
|
NoMulMatQ: c.NoMulMatQ,
|
||||||
@@ -85,16 +154,16 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
YarnBetaSlow: c.YarnBetaSlow,
|
YarnBetaSlow: c.YarnBetaSlow,
|
||||||
NGQA: c.NGQA,
|
NGQA: c.NGQA,
|
||||||
RMSNormEps: c.RMSNormEps,
|
RMSNormEps: c.RMSNormEps,
|
||||||
MLock: *c.MMlock,
|
MLock: mmlock,
|
||||||
RopeFreqBase: c.RopeFreqBase,
|
RopeFreqBase: c.RopeFreqBase,
|
||||||
RopeScaling: c.RopeScaling,
|
RopeScaling: c.RopeScaling,
|
||||||
Type: c.ModelType,
|
Type: c.ModelType,
|
||||||
RopeFreqScale: c.RopeFreqScale,
|
RopeFreqScale: c.RopeFreqScale,
|
||||||
NUMA: c.NUMA,
|
NUMA: c.NUMA,
|
||||||
Embeddings: *c.Embeddings,
|
Embeddings: embeddings,
|
||||||
LowVRAM: *c.LowVRAM,
|
LowVRAM: lowVRAM,
|
||||||
NGPULayers: int32(*c.NGPULayers),
|
NGPULayers: int32(nGPULayers),
|
||||||
MMap: *c.MMap,
|
MMap: mmap,
|
||||||
MainGPU: c.MainGPU,
|
MainGPU: c.MainGPU,
|
||||||
Threads: int32(*c.Threads),
|
Threads: int32(*c.Threads),
|
||||||
TensorSplit: c.TensorSplit,
|
TensorSplit: c.TensorSplit,
|
||||||
|
|||||||
@@ -9,21 +9,9 @@ import (
|
|||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||||
bb := backend
|
|
||||||
if bb == "" {
|
|
||||||
return nil, fmt.Errorf("backend is required")
|
|
||||||
}
|
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
|
||||||
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(bb),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
|
||||||
rerankModel, err := loader.BackendLoader(opts...)
|
rerankModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func SoundGeneration(
|
func SoundGeneration(
|
||||||
backend string,
|
|
||||||
modelFile string,
|
modelFile string,
|
||||||
text string,
|
text string,
|
||||||
duration *float32,
|
duration *float32,
|
||||||
@@ -25,18 +24,8 @@ func SoundGeneration(
|
|||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
backendConfig config.BackendConfig,
|
backendConfig config.BackendConfig,
|
||||||
) (string, *proto.Result, error) {
|
) (string, *proto.Result, error) {
|
||||||
if backend == "" {
|
|
||||||
return "", nil, fmt.Errorf("backend is a required parameter")
|
|
||||||
}
|
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(backend),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
|
||||||
|
|
||||||
soundGenModel, err := loader.BackendLoader(opts...)
|
soundGenModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
33
core/backend/token_metrics.go
Normal file
33
core/backend/token_metrics.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TokenMetrics(
|
||||||
|
modelFile string,
|
||||||
|
loader *model.ModelLoader,
|
||||||
|
appConfig *config.ApplicationConfig,
|
||||||
|
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
model, err := loader.BackendLoader(opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if model == nil {
|
||||||
|
return nil, fmt.Errorf("could not loadmodel model")
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
44
core/backend/tokenize.go
Normal file
44
core/backend/tokenize.go
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
||||||
|
|
||||||
|
modelFile := backendConfig.Model
|
||||||
|
|
||||||
|
var inferenceModel grpc.Backend
|
||||||
|
var err error
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
|
||||||
|
if backendConfig.Backend == "" {
|
||||||
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
|
} else {
|
||||||
|
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||||
|
inferenceModel, err = loader.BackendLoader(opts...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return schema.TokenizeResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
|
||||||
|
predictOptions.Prompt = s
|
||||||
|
|
||||||
|
// tokenize the string
|
||||||
|
resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions)
|
||||||
|
if err != nil {
|
||||||
|
return schema.TokenizeResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return schema.TokenizeResponse{
|
||||||
|
Tokens: resp.Tokens,
|
||||||
|
}, nil
|
||||||
|
|
||||||
|
}
|
||||||
@@ -14,13 +14,11 @@ import (
|
|||||||
|
|
||||||
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||||
|
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
if backendConfig.Backend == "" {
|
||||||
model.WithBackendString(model.WhisperBackend),
|
backendConfig.Backend = model.WhisperBackend
|
||||||
model.WithModel(backendConfig.Model),
|
}
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithThreads(uint32(*backendConfig.Threads)),
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
})
|
|
||||||
|
|
||||||
transcriptionModel, err := ml.BackendLoader(opts...)
|
transcriptionModel, err := ml.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -28,14 +28,9 @@ func ModelTTS(
|
|||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
})
|
||||||
ttsModel, err := loader.BackendLoader(opts...)
|
ttsModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -41,31 +41,35 @@ type RunCMD struct {
|
|||||||
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
||||||
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
|
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
|
||||||
|
|
||||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||||
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
||||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||||
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
|
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
|
||||||
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
||||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
|
||||||
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
|
DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
|
||||||
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
|
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
|
||||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||||
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
|
||||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
|
||||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||||
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
|
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
|
||||||
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
|
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||||
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
|
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||||
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
|
||||||
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
|
||||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
|
||||||
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
||||||
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
|
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
||||||
|
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||||
|
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
||||||
|
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
|
||||||
|
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||||
@@ -97,7 +101,11 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
|
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
|
||||||
config.WithOpaqueErrors(r.OpaqueErrors),
|
config.WithOpaqueErrors(r.OpaqueErrors),
|
||||||
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
|
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
|
||||||
|
config.WithSubtleKeyComparison(r.UseSubtleKeyComparison),
|
||||||
|
config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
|
||||||
|
config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
|
||||||
config.WithP2PNetworkID(r.Peer2PeerNetworkID),
|
config.WithP2PNetworkID(r.Peer2PeerNetworkID),
|
||||||
|
config.WithLoadToMemory(r.LoadToMemory),
|
||||||
}
|
}
|
||||||
|
|
||||||
token := ""
|
token := ""
|
||||||
|
|||||||
@@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
|||||||
|
|
||||||
options := config.BackendConfig{}
|
options := config.BackendConfig{}
|
||||||
options.SetDefaults()
|
options.SetDefaults()
|
||||||
|
options.Backend = t.Backend
|
||||||
|
|
||||||
var inputFile *string
|
var inputFile *string
|
||||||
if t.InputFile != "" {
|
if t.InputFile != "" {
|
||||||
inputFile = &t.InputFile
|
inputFile = &t.InputFile
|
||||||
}
|
}
|
||||||
|
|
||||||
filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
|
filePath, _, err := backend.SoundGeneration(t.Model, text,
|
||||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
||||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type UtilCMD struct {
|
type UtilCMD struct {
|
||||||
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
||||||
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
|
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
|
||||||
|
UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GGUFInfoCMD struct {
|
type GGUFInfoCMD struct {
|
||||||
@@ -30,6 +31,11 @@ type HFScanCMD struct {
|
|||||||
ToScan []string `arg:""`
|
ToScan []string `arg:""`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type UsecaseHeuristicCMD struct {
|
||||||
|
ConfigName string `name:"The config file to check"`
|
||||||
|
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||||
|
}
|
||||||
|
|
||||||
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||||
if u.Args == nil || len(u.Args) == 0 {
|
if u.Args == nil || len(u.Args) == 0 {
|
||||||
return fmt.Errorf("no GGUF file provided")
|
return fmt.Errorf("no GGUF file provided")
|
||||||
@@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error {
|
||||||
|
if len(uhcmd.ConfigName) == 0 {
|
||||||
|
log.Error().Msg("ConfigName is a required parameter")
|
||||||
|
return fmt.Errorf("config name is a required parameter")
|
||||||
|
}
|
||||||
|
if len(uhcmd.ModelsPath) == 0 {
|
||||||
|
log.Error().Msg("ModelsPath is a required parameter")
|
||||||
|
return fmt.Errorf("model path is a required parameter")
|
||||||
|
}
|
||||||
|
bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath)
|
||||||
|
err := bcl.LoadBackendConfig(uhcmd.ConfigName)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName)
|
||||||
|
if !exists {
|
||||||
|
log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found")
|
||||||
|
}
|
||||||
|
for name, uc := range config.GetAllBackendConfigUsecases() {
|
||||||
|
if bc.HasUsecases(uc) {
|
||||||
|
log.Info().Str("Usecase", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Info().Msg("---")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"embed"
|
"embed"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"regexp"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||||
@@ -16,7 +17,6 @@ type ApplicationConfig struct {
|
|||||||
ModelPath string
|
ModelPath string
|
||||||
LibPath string
|
LibPath string
|
||||||
UploadLimitMB, Threads, ContextSize int
|
UploadLimitMB, Threads, ContextSize int
|
||||||
DisableWebUI bool
|
|
||||||
F16 bool
|
F16 bool
|
||||||
Debug bool
|
Debug bool
|
||||||
ImageDir string
|
ImageDir string
|
||||||
@@ -31,11 +31,18 @@ type ApplicationConfig struct {
|
|||||||
PreloadModelsFromPath string
|
PreloadModelsFromPath string
|
||||||
CORSAllowOrigins string
|
CORSAllowOrigins string
|
||||||
ApiKeys []string
|
ApiKeys []string
|
||||||
EnforcePredownloadScans bool
|
|
||||||
OpaqueErrors bool
|
|
||||||
P2PToken string
|
P2PToken string
|
||||||
P2PNetworkID string
|
P2PNetworkID string
|
||||||
|
|
||||||
|
DisableWebUI bool
|
||||||
|
EnforcePredownloadScans bool
|
||||||
|
OpaqueErrors bool
|
||||||
|
UseSubtleKeyComparison bool
|
||||||
|
DisableApiKeyRequirementForHttpGet bool
|
||||||
|
HttpGetExemptedEndpoints []*regexp.Regexp
|
||||||
|
DisableGalleryEndpoint bool
|
||||||
|
LoadToMemory []string
|
||||||
|
|
||||||
ModelLibraryURL string
|
ModelLibraryURL string
|
||||||
|
|
||||||
Galleries []Gallery
|
Galleries []Gallery
|
||||||
@@ -57,8 +64,6 @@ type ApplicationConfig struct {
|
|||||||
ModelsURL []string
|
ModelsURL []string
|
||||||
|
|
||||||
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||||
|
|
||||||
DisableGalleryEndpoint bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type AppOption func(*ApplicationConfig)
|
type AppOption func(*ApplicationConfig)
|
||||||
@@ -327,6 +332,38 @@ func WithOpaqueErrors(opaque bool) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithLoadToMemory(models []string) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.LoadToMemory = models
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithSubtleKeyComparison(subtle bool) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.UseSubtleKeyComparison = subtle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.DisableApiKeyRequirementForHttpGet = required
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
|
||||||
|
for _, epr := range endpoints {
|
||||||
|
r, err := regexp.Compile(epr)
|
||||||
|
if err == nil && r != nil {
|
||||||
|
o.HttpGetExemptedEndpoints = append(o.HttpGetExemptedEndpoints, r)
|
||||||
|
} else {
|
||||||
|
log.Warn().Err(err).Str("regex", epr).Msg("Error while compiling HTTP Get Exemption regex, skipping this entry.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
|
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
|
||||||
// Some options defined at the application level are going to be passed as defaults for
|
// Some options defined at the application level are going to be passed as defaults for
|
||||||
// all the configuration for the models.
|
// all the configuration for the models.
|
||||||
|
|||||||
@@ -3,11 +3,13 @@ package config
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -27,13 +29,15 @@ type BackendConfig struct {
|
|||||||
schema.PredictionOptions `yaml:"parameters"`
|
schema.PredictionOptions `yaml:"parameters"`
|
||||||
Name string `yaml:"name"`
|
Name string `yaml:"name"`
|
||||||
|
|
||||||
F16 *bool `yaml:"f16"`
|
F16 *bool `yaml:"f16"`
|
||||||
Threads *int `yaml:"threads"`
|
Threads *int `yaml:"threads"`
|
||||||
Debug *bool `yaml:"debug"`
|
Debug *bool `yaml:"debug"`
|
||||||
Roles map[string]string `yaml:"roles"`
|
Roles map[string]string `yaml:"roles"`
|
||||||
Embeddings *bool `yaml:"embeddings"`
|
Embeddings *bool `yaml:"embeddings"`
|
||||||
Backend string `yaml:"backend"`
|
Backend string `yaml:"backend"`
|
||||||
TemplateConfig TemplateConfig `yaml:"template"`
|
TemplateConfig TemplateConfig `yaml:"template"`
|
||||||
|
KnownUsecaseStrings []string `yaml:"known_usecases"`
|
||||||
|
KnownUsecases *BackendConfigUsecases `yaml:"-"`
|
||||||
|
|
||||||
PromptStrings, InputStrings []string `yaml:"-"`
|
PromptStrings, InputStrings []string `yaml:"-"`
|
||||||
InputToken [][]int `yaml:"-"`
|
InputToken [][]int `yaml:"-"`
|
||||||
@@ -192,6 +196,21 @@ type TemplateConfig struct {
|
|||||||
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
|
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
|
||||||
// It defaults to \n
|
// It defaults to \n
|
||||||
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
||||||
|
|
||||||
|
Video string `yaml:"video"`
|
||||||
|
Image string `yaml:"image"`
|
||||||
|
Audio string `yaml:"audio"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||||
|
type BCAlias BackendConfig
|
||||||
|
var aux BCAlias
|
||||||
|
if err := value.Decode(&aux); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
*c = BackendConfig(aux)
|
||||||
|
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *BackendConfig) SetFunctionCallString(s string) {
|
func (c *BackendConfig) SetFunctionCallString(s string) {
|
||||||
@@ -410,3 +429,121 @@ func (c *BackendConfig) Validate() bool {
|
|||||||
func (c *BackendConfig) HasTemplate() bool {
|
func (c *BackendConfig) HasTemplate() bool {
|
||||||
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
|
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type BackendConfigUsecases int
|
||||||
|
|
||||||
|
const (
|
||||||
|
FLAG_ANY BackendConfigUsecases = 0b000000000
|
||||||
|
FLAG_CHAT BackendConfigUsecases = 0b000000001
|
||||||
|
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
|
||||||
|
FLAG_EDIT BackendConfigUsecases = 0b000000100
|
||||||
|
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
|
||||||
|
FLAG_RERANK BackendConfigUsecases = 0b000010000
|
||||||
|
FLAG_IMAGE BackendConfigUsecases = 0b000100000
|
||||||
|
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
|
||||||
|
FLAG_TTS BackendConfigUsecases = 0b010000000
|
||||||
|
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
|
||||||
|
|
||||||
|
// Common Subsets
|
||||||
|
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
|
||||||
|
)
|
||||||
|
|
||||||
|
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||||
|
return map[string]BackendConfigUsecases{
|
||||||
|
"FLAG_ANY": FLAG_ANY,
|
||||||
|
"FLAG_CHAT": FLAG_CHAT,
|
||||||
|
"FLAG_COMPLETION": FLAG_COMPLETION,
|
||||||
|
"FLAG_EDIT": FLAG_EDIT,
|
||||||
|
"FLAG_EMBEDDINGS": FLAG_EMBEDDINGS,
|
||||||
|
"FLAG_RERANK": FLAG_RERANK,
|
||||||
|
"FLAG_IMAGE": FLAG_IMAGE,
|
||||||
|
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
||||||
|
"FLAG_TTS": FLAG_TTS,
|
||||||
|
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
||||||
|
"FLAG_LLM": FLAG_LLM,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
|
||||||
|
if len(input) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
result := FLAG_ANY
|
||||||
|
flags := GetAllBackendConfigUsecases()
|
||||||
|
for _, str := range input {
|
||||||
|
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
|
||||||
|
if exists {
|
||||||
|
result |= flag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &result
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
|
||||||
|
func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
|
||||||
|
if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return c.GuessUsecases(u)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
|
||||||
|
// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
|
||||||
|
// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
|
||||||
|
func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||||
|
if (u & FLAG_CHAT) == FLAG_CHAT {
|
||||||
|
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
|
||||||
|
if c.TemplateConfig.Completion == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_EDIT) == FLAG_EDIT {
|
||||||
|
if c.TemplateConfig.Edit == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
|
||||||
|
if c.Embeddings == nil || !*c.Embeddings {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_IMAGE) == FLAG_IMAGE {
|
||||||
|
imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
|
||||||
|
if !slices.Contains(imageBackends, c.Backend) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (u & FLAG_RERANK) == FLAG_RERANK {
|
||||||
|
if c.Backend != "rerankers" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
|
||||||
|
if c.Backend != "whisper" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_TTS) == FLAG_TTS {
|
||||||
|
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
|
||||||
|
if !slices.Contains(ttsBackends, c.Backend) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
|
||||||
|
if c.Backend != "transformers-musicgen" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|||||||
35
core/config/backend_config_filter.go
Normal file
35
core/config/backend_config_filter.go
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import "regexp"
|
||||||
|
|
||||||
|
type BackendConfigFilterFn func(string, *BackendConfig) bool
|
||||||
|
|
||||||
|
func NoFilterFn(_ string, _ *BackendConfig) bool { return true }
|
||||||
|
|
||||||
|
func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) {
|
||||||
|
if filter == "" {
|
||||||
|
return NoFilterFn, nil
|
||||||
|
}
|
||||||
|
rxp, err := regexp.Compile(filter)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return func(name string, config *BackendConfig) bool {
|
||||||
|
if config != nil {
|
||||||
|
return rxp.MatchString(config.Name)
|
||||||
|
}
|
||||||
|
return rxp.MatchString(name)
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn {
|
||||||
|
if usecases == FLAG_ANY {
|
||||||
|
return NoFilterFn
|
||||||
|
}
|
||||||
|
return func(name string, config *BackendConfig) bool {
|
||||||
|
if config == nil {
|
||||||
|
return false // TODO: Potentially make this a param, for now, no known usecase to include
|
||||||
|
}
|
||||||
|
return config.HasUsecases(usecases)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -201,6 +201,26 @@ func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (bcl *BackendConfigLoader) GetBackendConfigsByFilter(filter BackendConfigFilterFn) []BackendConfig {
|
||||||
|
bcl.Lock()
|
||||||
|
defer bcl.Unlock()
|
||||||
|
var res []BackendConfig
|
||||||
|
|
||||||
|
if filter == nil {
|
||||||
|
filter = NoFilterFn
|
||||||
|
}
|
||||||
|
|
||||||
|
for n, v := range bcl.configs {
|
||||||
|
if filter(n, &v) {
|
||||||
|
res = append(res, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: I don't think this one needs to Sort on name... but we'll see what breaks.
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
|
func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
|
||||||
bcl.Lock()
|
bcl.Lock()
|
||||||
defer bcl.Unlock()
|
defer bcl.Unlock()
|
||||||
|
|||||||
@@ -19,12 +19,17 @@ var _ = Describe("Test cases for config related functions", func() {
|
|||||||
`backend: "../foo-bar"
|
`backend: "../foo-bar"
|
||||||
name: "foo"
|
name: "foo"
|
||||||
parameters:
|
parameters:
|
||||||
model: "foo-bar"`)
|
model: "foo-bar"
|
||||||
|
known_usecases:
|
||||||
|
- chat
|
||||||
|
- COMPLETION
|
||||||
|
`)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
config, err := readBackendConfigFromFile(tmp.Name())
|
config, err := readBackendConfigFromFile(tmp.Name())
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(config).ToNot(BeNil())
|
Expect(config).ToNot(BeNil())
|
||||||
Expect(config.Validate()).To(BeFalse())
|
Expect(config.Validate()).To(BeFalse())
|
||||||
|
Expect(config.KnownUsecases).ToNot(BeNil())
|
||||||
})
|
})
|
||||||
It("Test Validate", func() {
|
It("Test Validate", func() {
|
||||||
tmp, err := os.CreateTemp("", "config.yaml")
|
tmp, err := os.CreateTemp("", "config.yaml")
|
||||||
@@ -61,4 +66,99 @@ parameters:
|
|||||||
Expect(config.Validate()).To(BeTrue())
|
Expect(config.Validate()).To(BeTrue())
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
It("Properly handles backend usecase matching", func() {
|
||||||
|
|
||||||
|
a := BackendConfig{
|
||||||
|
Name: "a",
|
||||||
|
}
|
||||||
|
Expect(a.HasUsecases(FLAG_ANY)).To(BeTrue()) // FLAG_ANY just means the config _exists_ essentially.
|
||||||
|
|
||||||
|
b := BackendConfig{
|
||||||
|
Name: "b",
|
||||||
|
Backend: "stablediffusion",
|
||||||
|
}
|
||||||
|
Expect(b.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(b.HasUsecases(FLAG_IMAGE)).To(BeTrue())
|
||||||
|
Expect(b.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
||||||
|
|
||||||
|
c := BackendConfig{
|
||||||
|
Name: "c",
|
||||||
|
Backend: "llama-cpp",
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
Chat: "chat",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
Expect(c.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(c.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
||||||
|
Expect(c.HasUsecases(FLAG_COMPLETION)).To(BeFalse())
|
||||||
|
Expect(c.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
||||||
|
|
||||||
|
d := BackendConfig{
|
||||||
|
Name: "d",
|
||||||
|
Backend: "llama-cpp",
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
Chat: "chat",
|
||||||
|
Completion: "completion",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
Expect(d.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(d.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
||||||
|
Expect(d.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
||||||
|
Expect(d.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
||||||
|
|
||||||
|
trueValue := true
|
||||||
|
e := BackendConfig{
|
||||||
|
Name: "e",
|
||||||
|
Backend: "llama-cpp",
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
Completion: "completion",
|
||||||
|
},
|
||||||
|
Embeddings: &trueValue,
|
||||||
|
}
|
||||||
|
|
||||||
|
Expect(e.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(e.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
||||||
|
Expect(e.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
||||||
|
Expect(e.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
||||||
|
Expect(e.HasUsecases(FLAG_EMBEDDINGS)).To(BeTrue())
|
||||||
|
|
||||||
|
f := BackendConfig{
|
||||||
|
Name: "f",
|
||||||
|
Backend: "piper",
|
||||||
|
}
|
||||||
|
Expect(f.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(f.HasUsecases(FLAG_TTS)).To(BeTrue())
|
||||||
|
Expect(f.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
||||||
|
|
||||||
|
g := BackendConfig{
|
||||||
|
Name: "g",
|
||||||
|
Backend: "whisper",
|
||||||
|
}
|
||||||
|
Expect(g.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(g.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
|
||||||
|
Expect(g.HasUsecases(FLAG_TTS)).To(BeFalse())
|
||||||
|
|
||||||
|
h := BackendConfig{
|
||||||
|
Name: "h",
|
||||||
|
Backend: "transformers-musicgen",
|
||||||
|
}
|
||||||
|
Expect(h.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(h.HasUsecases(FLAG_TRANSCRIPT)).To(BeFalse())
|
||||||
|
Expect(h.HasUsecases(FLAG_TTS)).To(BeTrue())
|
||||||
|
Expect(h.HasUsecases(FLAG_SOUND_GENERATION)).To(BeTrue())
|
||||||
|
|
||||||
|
knownUsecases := FLAG_CHAT | FLAG_COMPLETION
|
||||||
|
i := BackendConfig{
|
||||||
|
Name: "i",
|
||||||
|
Backend: "whisper",
|
||||||
|
// Earlier test checks parsing, this just needs to set final values
|
||||||
|
KnownUsecases: &knownUsecases,
|
||||||
|
}
|
||||||
|
Expect(i.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(i.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
|
||||||
|
Expect(i.HasUsecases(FLAG_TTS)).To(BeFalse())
|
||||||
|
Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
||||||
|
Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
||||||
|
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
|
|||||||
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
|
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
|
||||||
var refFile string
|
var refFile string
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
|
||||||
refFile = string(d)
|
refFile = string(d)
|
||||||
if len(refFile) == 0 {
|
if len(refFile) == 0 {
|
||||||
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
|
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
|
||||||
@@ -156,7 +156,7 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
|
|||||||
}
|
}
|
||||||
uri := downloader.URI(gallery.URL)
|
uri := downloader.URI(gallery.URL)
|
||||||
|
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
|
||||||
return yaml.Unmarshal(d, &models)
|
return yaml.Unmarshal(d, &models)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ type PromptTemplate struct {
|
|||||||
func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
|
func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
|
||||||
var config Config
|
var config Config
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
|
||||||
return yaml.Unmarshal(d, &config)
|
return yaml.Unmarshal(d, &config)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -3,13 +3,15 @@ package http
|
|||||||
import (
|
import (
|
||||||
"embed"
|
"embed"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
|
||||||
|
|
||||||
|
"github.com/dave-gray101/v2keyauth"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||||
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
"github.com/mudler/LocalAI/core/http/routes"
|
"github.com/mudler/LocalAI/core/http/routes"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
@@ -29,24 +31,6 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func readAuthHeader(c *fiber.Ctx) string {
|
|
||||||
authHeader := c.Get("Authorization")
|
|
||||||
|
|
||||||
// elevenlabs
|
|
||||||
xApiKey := c.Get("xi-api-key")
|
|
||||||
if xApiKey != "" {
|
|
||||||
authHeader = "Bearer " + xApiKey
|
|
||||||
}
|
|
||||||
|
|
||||||
// anthropic
|
|
||||||
xApiKey = c.Get("x-api-key")
|
|
||||||
if xApiKey != "" {
|
|
||||||
authHeader = "Bearer " + xApiKey
|
|
||||||
}
|
|
||||||
|
|
||||||
return authHeader
|
|
||||||
}
|
|
||||||
|
|
||||||
// Embed a directory
|
// Embed a directory
|
||||||
//
|
//
|
||||||
//go:embed static/*
|
//go:embed static/*
|
||||||
@@ -137,37 +121,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
// Health Checks should always be exempt from auth, so register these first
|
||||||
auth := func(c *fiber.Ctx) error {
|
routes.HealthRoutes(app)
|
||||||
if len(appConfig.ApiKeys) == 0 {
|
|
||||||
return c.Next()
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(appConfig.ApiKeys) == 0 {
|
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
|
||||||
return c.Next()
|
if err != nil || kaConfig == nil {
|
||||||
}
|
return nil, fmt.Errorf("failed to create key auth config: %w", err)
|
||||||
|
|
||||||
authHeader := readAuthHeader(c)
|
|
||||||
if authHeader == "" {
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's a bearer token
|
|
||||||
authHeaderParts := strings.Split(authHeader, " ")
|
|
||||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
|
||||||
}
|
|
||||||
|
|
||||||
apiKey := authHeaderParts[1]
|
|
||||||
for _, key := range appConfig.ApiKeys {
|
|
||||||
if apiKey == key {
|
|
||||||
return c.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
||||||
|
app.Use(v2keyauth.New(*kaConfig))
|
||||||
|
|
||||||
if appConfig.CORS {
|
if appConfig.CORS {
|
||||||
var c func(ctx *fiber.Ctx) error
|
var c func(ctx *fiber.Ctx) error
|
||||||
if appConfig.CORSAllowOrigins == "" {
|
if appConfig.CORSAllowOrigins == "" {
|
||||||
@@ -192,13 +156,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
|||||||
galleryService := services.NewGalleryService(appConfig)
|
galleryService := services.NewGalleryService(appConfig)
|
||||||
galleryService.Start(appConfig.Context, cl)
|
galleryService.Start(appConfig.Context, cl)
|
||||||
|
|
||||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
|
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
|
||||||
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
|
||||||
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
|
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
|
||||||
if !appConfig.DisableWebUI {
|
if !appConfig.DisableWebUI {
|
||||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
|
||||||
}
|
}
|
||||||
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
|
routes.RegisterJINARoutes(app, cl, ml, appConfig)
|
||||||
|
|
||||||
httpFS := http.FS(embedDirStatic)
|
httpFS := http.FS(embedDirStatic)
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,9 @@ import (
|
|||||||
"github.com/sashabaranov/go-openai/jsonschema"
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const apiKey = "joshua"
|
||||||
|
const bearerKey = "Bearer " + apiKey
|
||||||
|
|
||||||
const testPrompt = `### System:
|
const testPrompt = `### System:
|
||||||
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
||||||
|
|
||||||
@@ -50,11 +53,19 @@ type modelApplyRequest struct {
|
|||||||
|
|
||||||
func getModelStatus(url string) (response map[string]interface{}) {
|
func getModelStatus(url string) (response map[string]interface{}) {
|
||||||
// Create the HTTP request
|
// Create the HTTP request
|
||||||
resp, err := http.Get(url)
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("Error creating request:", err)
|
fmt.Println("Error creating request:", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
client := &http.Client{}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error sending request:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
@@ -72,14 +83,15 @@ func getModelStatus(url string) (response map[string]interface{}) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func getModels(url string) (response []gallery.GalleryModel) {
|
func getModels(url string) ([]gallery.GalleryModel, error) {
|
||||||
|
response := []gallery.GalleryModel{}
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
// TODO: No tests currently seem to exercise file:// urls. Fix?
|
// TODO: No tests currently seem to exercise file:// urls. Fix?
|
||||||
uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
|
err := uri.DownloadWithAuthorizationAndCallback("", bearerKey, func(url string, i []byte) error {
|
||||||
// Unmarshal YAML data into a struct
|
// Unmarshal YAML data into a struct
|
||||||
return json.Unmarshal(i, &response)
|
return json.Unmarshal(i, &response)
|
||||||
})
|
})
|
||||||
return
|
return response, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
|
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
|
||||||
@@ -101,6 +113,7 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
|
|
||||||
// Make the request
|
// Make the request
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
@@ -140,6 +153,7 @@ func postRequestJSON[B any](url string, bodyJson *B) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
@@ -175,6 +189,7 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
|
|||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
@@ -195,6 +210,35 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
|
|||||||
return json.Unmarshal(body, respJson)
|
return json.Unmarshal(body, respJson)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func postInvalidRequest(url string) (error, int) {
|
||||||
|
|
||||||
|
req, err := http.NewRequest("POST", url, bytes.NewBufferString("invalid request"))
|
||||||
|
if err != nil {
|
||||||
|
return err, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
|
||||||
|
return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)), resp.StatusCode
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, resp.StatusCode
|
||||||
|
}
|
||||||
|
|
||||||
//go:embed backend-assets/*
|
//go:embed backend-assets/*
|
||||||
var backendAssets embed.FS
|
var backendAssets embed.FS
|
||||||
|
|
||||||
@@ -260,6 +304,7 @@ var _ = Describe("API test", func() {
|
|||||||
config.WithContext(c),
|
config.WithContext(c),
|
||||||
config.WithGalleries(galleries),
|
config.WithGalleries(galleries),
|
||||||
config.WithModelPath(modelDir),
|
config.WithModelPath(modelDir),
|
||||||
|
config.WithApiKeys([]string{apiKey}),
|
||||||
config.WithBackendAssets(backendAssets),
|
config.WithBackendAssets(backendAssets),
|
||||||
config.WithBackendAssetsOutput(backendAssetsDir))...)
|
config.WithBackendAssetsOutput(backendAssetsDir))...)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@@ -269,7 +314,7 @@ var _ = Describe("API test", func() {
|
|||||||
|
|
||||||
go app.Listen("127.0.0.1:9090")
|
go app.Listen("127.0.0.1:9090")
|
||||||
|
|
||||||
defaultConfig := openai.DefaultConfig("")
|
defaultConfig := openai.DefaultConfig(apiKey)
|
||||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||||
|
|
||||||
client2 = openaigo.NewClient("")
|
client2 = openaigo.NewClient("")
|
||||||
@@ -295,10 +340,19 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("Auth Tests", func() {
|
||||||
|
It("Should fail if the api key is missing", func() {
|
||||||
|
err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
|
||||||
|
Expect(err).ToNot(BeNil())
|
||||||
|
Expect(sc).To(Equal(403))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Context("Applying models", func() {
|
Context("Applying models", func() {
|
||||||
|
|
||||||
It("applies models from a gallery", func() {
|
It("applies models from a gallery", func() {
|
||||||
models := getModels("http://127.0.0.1:9090/models/available")
|
models, err := getModels("http://127.0.0.1:9090/models/available")
|
||||||
|
Expect(err).To(BeNil())
|
||||||
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
||||||
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
|
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
|
||||||
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
|
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
|
||||||
@@ -331,7 +385,8 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||||
Expect(content["foo"]).To(Equal("bar"))
|
Expect(content["foo"]).To(Equal("bar"))
|
||||||
|
|
||||||
models = getModels("http://127.0.0.1:9090/models/available")
|
models, err = getModels("http://127.0.0.1:9090/models/available")
|
||||||
|
Expect(err).To(BeNil())
|
||||||
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
||||||
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
|
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
|
||||||
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
|
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
|
||||||
|
|||||||
@@ -19,14 +19,16 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
|
|||||||
if ctx.Params("model") != "" {
|
if ctx.Params("model") != "" {
|
||||||
modelInput = ctx.Params("model")
|
modelInput = ctx.Params("model")
|
||||||
}
|
}
|
||||||
|
if ctx.Query("model") != "" {
|
||||||
|
modelInput = ctx.Query("model")
|
||||||
|
}
|
||||||
// Set model from bearer token, if available
|
// Set model from bearer token, if available
|
||||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
|
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
|
||||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||||
|
|
||||||
// If no model was specified, take the first available
|
// If no model was specified, take the first available
|
||||||
if modelInput == "" && !bearerExists && firstModel {
|
if modelInput == "" && !bearerExists && firstModel {
|
||||||
models, _ := services.ListModels(cl, loader, "", true)
|
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
if len(models) > 0 {
|
if len(models) > 0 {
|
||||||
modelInput = models[0]
|
modelInput = models[0]
|
||||||
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Support uploading files?
|
// TODO: Support uploading files?
|
||||||
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
config.LoadOptionF16(appConfig.F16),
|
config.LoadOptionF16(appConfig.F16),
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
modelFile = input.Model
|
modelFile = input.Model
|
||||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
} else {
|
} else {
|
||||||
modelFile = cfg.Model
|
modelFile = cfg.Model
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
if input.Backend != "" {
|
if input.Backend != "" {
|
||||||
@@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
Documents: req.Documents,
|
Documents: req.Documents,
|
||||||
}
|
}
|
||||||
|
|
||||||
results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
|
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
60
core/http/endpoints/localai/get_token_metrics.go
Normal file
60
core/http/endpoints/localai/get_token_metrics.go
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
|
||||||
|
//
|
||||||
|
// @Summary Get TokenMetrics for Active Slot.
|
||||||
|
// @Accept json
|
||||||
|
// @Produce audio/x-wav
|
||||||
|
// @Success 200 {string} binary "generated audio/wav file"
|
||||||
|
// @Router /v1/tokenMetrics [get]
|
||||||
|
// @Router /tokenMetrics [get]
|
||||||
|
func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
|
input := new(schema.TokenMetricsRequest)
|
||||||
|
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Err(err)
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Token Metrics for model: %s", modelFile)
|
||||||
|
|
||||||
|
response, err := backend.TokenMetrics(modelFile, ml, appConfig, *cfg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return c.JSON(response)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -17,12 +17,14 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
loadedModels := ml.ListModels()
|
||||||
for b := range appConfig.ExternalGRPCBackends {
|
for b := range appConfig.ExternalGRPCBackends {
|
||||||
availableBackends = append(availableBackends, b)
|
availableBackends = append(availableBackends, b)
|
||||||
}
|
}
|
||||||
return c.JSON(
|
return c.JSON(
|
||||||
schema.SystemInformationResponse{
|
schema.SystemInformationResponse{
|
||||||
Backends: availableBackends,
|
Backends: availableBackends,
|
||||||
|
Models: loadedModels,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
58
core/http/endpoints/localai/tokenize.go
Normal file
58
core/http/endpoints/localai/tokenize.go
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenizeEndpoint exposes a REST API to tokenize the content
|
||||||
|
// @Summary Tokenize the input.
|
||||||
|
// @Success 200 {object} schema.TokenizeResponse "Response"
|
||||||
|
// @Router /v1/tokenize [post]
|
||||||
|
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
|
input := new(schema.TokenizeRequest)
|
||||||
|
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Err(err)
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(tokenResponse)
|
||||||
|
return nil
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,7 +13,7 @@ import (
|
|||||||
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||||
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
|
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
models, _ := services.ListModels(cl, ml, "", true)
|
models, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
galleryConfigs := map[string]*gallery.Config{}
|
galleryConfigs := map[string]*gallery.Config{}
|
||||||
@@ -32,18 +32,10 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
|||||||
// Get model statuses to display in the UI the operation in progress
|
// Get model statuses to display in the UI the operation in progress
|
||||||
processingModels, taskTypes := modelStatus()
|
processingModels, taskTypes := modelStatus()
|
||||||
|
|
||||||
modelsWithoutConfig := []string{}
|
|
||||||
|
|
||||||
for _, m := range models {
|
|
||||||
if _, ok := modelsWithBackendConfig[m]; !ok {
|
|
||||||
modelsWithoutConfig = append(modelsWithoutConfig, m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||||
"Version": internal.PrintableVersion(),
|
"Version": internal.PrintableVersion(),
|
||||||
"Models": modelsWithoutConfig,
|
"Models": models,
|
||||||
"ModelsConfig": backendConfigs,
|
"ModelsConfig": backendConfigs,
|
||||||
"GalleryConfig": galleryConfigs,
|
"GalleryConfig": galleryConfigs,
|
||||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||||
|
|||||||
@@ -225,7 +225,7 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
|
|||||||
|
|
||||||
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
|
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
|
||||||
found = false
|
found = false
|
||||||
models, err := services.ListModels(cl, ml, "", true)
|
models, err := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
textContentToReturn = ""
|
textContentToReturn = ""
|
||||||
id = uuid.New().String()
|
id = uuid.New().String()
|
||||||
created = int(time.Now().Unix())
|
created = int(time.Now().Unix())
|
||||||
|
// Set CorrelationID
|
||||||
|
correlationID := c.Get("X-Correlation-ID")
|
||||||
|
if len(strings.TrimSpace(correlationID)) == 0 {
|
||||||
|
correlationID = id
|
||||||
|
}
|
||||||
|
c.Set("X-Correlation-ID", correlationID)
|
||||||
|
|
||||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
c.Set("Cache-Control", "no-cache")
|
c.Set("Cache-Control", "no-cache")
|
||||||
c.Set("Connection", "keep-alive")
|
c.Set("Connection", "keep-alive")
|
||||||
c.Set("Transfer-Encoding", "chunked")
|
c.Set("Transfer-Encoding", "chunked")
|
||||||
|
c.Set("X-Correlation-ID", id)
|
||||||
|
|
||||||
responses := make(chan schema.OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
@@ -640,8 +647,16 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
|||||||
for _, m := range input.Messages {
|
for _, m := range input.Messages {
|
||||||
images = append(images, m.StringImages...)
|
images = append(images, m.StringImages...)
|
||||||
}
|
}
|
||||||
|
videos := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
videos = append(videos, m.StringVideos...)
|
||||||
|
}
|
||||||
|
audios := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
audios = append(audios, m.StringAudios...)
|
||||||
|
}
|
||||||
|
|
||||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
|
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Msg("model inference failed")
|
log.Error().Err(err).Msg("model inference failed")
|
||||||
return "", err
|
return "", err
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
// Add Correlation
|
||||||
|
c.Set("X-Correlation-ID", id)
|
||||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
|
|||||||
@@ -27,9 +27,17 @@ func ComputeChoices(
|
|||||||
for _, m := range req.Messages {
|
for _, m := range req.Messages {
|
||||||
images = append(images, m.StringImages...)
|
images = append(images, m.StringImages...)
|
||||||
}
|
}
|
||||||
|
videos := []string{}
|
||||||
|
for _, m := range req.Messages {
|
||||||
|
videos = append(videos, m.StringVideos...)
|
||||||
|
}
|
||||||
|
audios := []string{}
|
||||||
|
for _, m := range req.Messages {
|
||||||
|
audios = append(audios, m.StringAudios...)
|
||||||
|
}
|
||||||
|
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
|
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, backend.TokenUsage{}, err
|
return result, backend.TokenUsage{}, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,32 +18,32 @@ func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader)
|
|||||||
filter := c.Query("filter")
|
filter := c.Query("filter")
|
||||||
|
|
||||||
// By default, exclude any loose files that are already referenced by a configuration file.
|
// By default, exclude any loose files that are already referenced by a configuration file.
|
||||||
excludeConfigured := c.QueryBool("excludeConfigured", true)
|
var policy services.LooseFilePolicy
|
||||||
|
if c.QueryBool("excludeConfigured", true) {
|
||||||
|
policy = services.SKIP_IF_CONFIGURED
|
||||||
|
} else {
|
||||||
|
policy = services.ALWAYS_INCLUDE // This replicates current behavior. TODO: give more options to the user?
|
||||||
|
}
|
||||||
|
|
||||||
dataModels, err := modelList(bcl, ml, filter, excludeConfigured)
|
filterFn, err := config.BuildNameFilterFn(filter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
modelNames, err := services.ListModels(bcl, ml, filterFn, policy)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map from a slice of names to a slice of OpenAIModel response objects
|
||||||
|
dataModels := []schema.OpenAIModel{}
|
||||||
|
for _, m := range modelNames {
|
||||||
|
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(schema.ModelsDataResponse{
|
return c.JSON(schema.ModelsDataResponse{
|
||||||
Object: "list",
|
Object: "list",
|
||||||
Data: dataModels,
|
Data: dataModels,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
|
|
||||||
|
|
||||||
models, err := services.ListModels(bcl, ml, filter, excludeConfigured)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
dataModels := []schema.OpenAIModel{}
|
|
||||||
|
|
||||||
// Then iterate through the loose files:
|
|
||||||
for _, m := range models {
|
|
||||||
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return dataModels, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -6,15 +6,22 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/mudler/LocalAI/pkg/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type correlationIDKeyType string
|
||||||
|
|
||||||
|
// CorrelationIDKey to track request across process boundary
|
||||||
|
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||||
|
|
||||||
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
input := new(schema.OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
|
|
||||||
@@ -24,9 +31,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
|
|||||||
}
|
}
|
||||||
|
|
||||||
received, _ := json.Marshal(input)
|
received, _ := json.Marshal(input)
|
||||||
|
// Extract or generate the correlation ID
|
||||||
|
correlationID := c.Get("X-Correlation-ID", uuid.New().String())
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
// Add the correlation ID to the new context
|
||||||
|
ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
|
||||||
|
|
||||||
|
input.Context = ctxWithCorrelationID
|
||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
|
|
||||||
log.Debug().Msgf("Request received: %s", string(received))
|
log.Debug().Msgf("Request received: %s", string(received))
|
||||||
@@ -135,7 +147,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Decode each request's message content
|
// Decode each request's message content
|
||||||
index := 0
|
imgIndex, vidIndex, audioIndex := 0, 0, 0
|
||||||
for i, m := range input.Messages {
|
for i, m := range input.Messages {
|
||||||
switch content := m.Content.(type) {
|
switch content := m.Content.(type) {
|
||||||
case string:
|
case string:
|
||||||
@@ -144,20 +156,58 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
dat, _ := json.Marshal(content)
|
dat, _ := json.Marshal(content)
|
||||||
c := []schema.Content{}
|
c := []schema.Content{}
|
||||||
json.Unmarshal(dat, &c)
|
json.Unmarshal(dat, &c)
|
||||||
|
CONTENT:
|
||||||
for _, pp := range c {
|
for _, pp := range c {
|
||||||
if pp.Type == "text" {
|
switch pp.Type {
|
||||||
|
case "text":
|
||||||
input.Messages[i].StringContent = pp.Text
|
input.Messages[i].StringContent = pp.Text
|
||||||
} else if pp.Type == "image_url" {
|
case "video", "video_url":
|
||||||
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
|
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
|
||||||
if err == nil {
|
if err != nil {
|
||||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
log.Error().Msgf("Failed encoding video: %s", err)
|
||||||
// set a placeholder for each image
|
continue CONTENT
|
||||||
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
|
|
||||||
index++
|
|
||||||
} else {
|
|
||||||
log.Error().Msgf("Failed encoding image: %s", err)
|
|
||||||
}
|
}
|
||||||
|
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
|
||||||
|
t := "[vid-{{.ID}}]{{.Text}}"
|
||||||
|
if config.TemplateConfig.Video != "" {
|
||||||
|
t = config.TemplateConfig.Video
|
||||||
|
}
|
||||||
|
// set a placeholder for each image
|
||||||
|
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, vidIndex, input.Messages[i].StringContent)
|
||||||
|
vidIndex++
|
||||||
|
case "audio_url", "audio":
|
||||||
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
|
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("Failed encoding image: %s", err)
|
||||||
|
continue CONTENT
|
||||||
|
}
|
||||||
|
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
t := "[audio-{{.ID}}]{{.Text}}"
|
||||||
|
if config.TemplateConfig.Audio != "" {
|
||||||
|
t = config.TemplateConfig.Audio
|
||||||
|
}
|
||||||
|
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, audioIndex, input.Messages[i].StringContent)
|
||||||
|
audioIndex++
|
||||||
|
case "image_url", "image":
|
||||||
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
|
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("Failed encoding image: %s", err)
|
||||||
|
continue CONTENT
|
||||||
|
}
|
||||||
|
|
||||||
|
t := "[img-{{.ID}}]{{.Text}}"
|
||||||
|
if config.TemplateConfig.Image != "" {
|
||||||
|
t = config.TemplateConfig.Image
|
||||||
|
}
|
||||||
|
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, imgIndex, input.Messages[i].StringContent)
|
||||||
|
imgIndex++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
94
core/http/middleware/auth.go
Normal file
94
core/http/middleware/auth.go
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
package middleware
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/subtle"
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/dave-gray101/v2keyauth"
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/gofiber/fiber/v2/middleware/keyauth"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
|
||||||
|
// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
|
||||||
|
// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
|
||||||
|
|
||||||
|
func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
|
||||||
|
customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &v2keyauth.Config{
|
||||||
|
CustomKeyLookup: customLookup,
|
||||||
|
Next: getApiKeyRequiredFilterFunction(applicationConfig),
|
||||||
|
Validator: getApiKeyValidationFunction(applicationConfig),
|
||||||
|
ErrorHandler: getApiKeyErrorHandler(applicationConfig),
|
||||||
|
AuthScheme: "Bearer",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
|
||||||
|
return func(ctx *fiber.Ctx, err error) error {
|
||||||
|
if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
|
||||||
|
if len(applicationConfig.ApiKeys) == 0 {
|
||||||
|
return ctx.Next() // if no keys are set up, any error we get here is not an error.
|
||||||
|
}
|
||||||
|
if applicationConfig.OpaqueErrors {
|
||||||
|
return ctx.SendStatus(403)
|
||||||
|
}
|
||||||
|
return ctx.Status(403).SendString(err.Error())
|
||||||
|
}
|
||||||
|
if applicationConfig.OpaqueErrors {
|
||||||
|
return ctx.SendStatus(500)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
|
||||||
|
|
||||||
|
if applicationConfig.UseSubtleKeyComparison {
|
||||||
|
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
|
||||||
|
if len(applicationConfig.ApiKeys) == 0 {
|
||||||
|
return true, nil // If no keys are setup, accept everything
|
||||||
|
}
|
||||||
|
for _, validKey := range applicationConfig.ApiKeys {
|
||||||
|
if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, v2keyauth.ErrMissingOrMalformedAPIKey
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
|
||||||
|
if len(applicationConfig.ApiKeys) == 0 {
|
||||||
|
return true, nil // If no keys are setup, accept everything
|
||||||
|
}
|
||||||
|
for _, validKey := range applicationConfig.ApiKeys {
|
||||||
|
if apiKey == validKey {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, v2keyauth.ErrMissingOrMalformedAPIKey
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
|
||||||
|
if applicationConfig.DisableApiKeyRequirementForHttpGet {
|
||||||
|
return func(c *fiber.Ctx) bool {
|
||||||
|
if c.Method() != "GET" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
|
||||||
|
if rx.MatchString(c.Path()) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return func(c *fiber.Ctx) bool { return false }
|
||||||
|
}
|
||||||
@@ -10,12 +10,11 @@ import (
|
|||||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
// Elevenlabs
|
// Elevenlabs
|
||||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
app.Post("/v1/sound-generation", auth, elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
13
core/http/routes/health.go
Normal file
13
core/http/routes/health.go
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package routes
|
||||||
|
|
||||||
|
import "github.com/gofiber/fiber/v2"
|
||||||
|
|
||||||
|
func HealthRoutes(app *fiber.App) {
|
||||||
|
// Service health checks
|
||||||
|
ok := func(c *fiber.Ctx) error {
|
||||||
|
return c.SendStatus(200)
|
||||||
|
}
|
||||||
|
|
||||||
|
app.Get("/healthz", ok)
|
||||||
|
app.Get("/readyz", ok)
|
||||||
|
}
|
||||||
@@ -11,8 +11,7 @@ import (
|
|||||||
func RegisterJINARoutes(app *fiber.App,
|
func RegisterJINARoutes(app *fiber.App,
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
// POST endpoint to mimic the reranking
|
// POST endpoint to mimic the reranking
|
||||||
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||||
|
|||||||
@@ -15,61 +15,55 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
|||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
galleryService *services.GalleryService,
|
galleryService *services.GalleryService) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||||
|
|
||||||
// LocalAI API endpoints
|
// LocalAI API endpoints
|
||||||
if !appConfig.DisableGalleryEndpoint {
|
if !appConfig.DisableGalleryEndpoint {
|
||||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||||
app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
||||||
|
|
||||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||||
app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||||
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
|
app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||||
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
|
app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// Stores
|
// Stores
|
||||||
sl := model.NewModelLoader("")
|
sl := model.NewModelLoader("")
|
||||||
app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
|
app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||||
app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
|
app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||||
app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
|
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||||
app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
|
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||||
|
|
||||||
// Kubernetes health checks
|
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||||
ok := func(c *fiber.Ctx) error {
|
|
||||||
return c.SendStatus(200)
|
|
||||||
}
|
|
||||||
|
|
||||||
app.Get("/healthz", ok)
|
|
||||||
app.Get("/readyz", ok)
|
|
||||||
|
|
||||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
|
||||||
|
|
||||||
// Experimental Backend Statistics Module
|
// Experimental Backend Statistics Module
|
||||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
|
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
|
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||||
|
|
||||||
// p2p
|
// p2p
|
||||||
if p2p.IsP2PEnabled() {
|
if p2p.IsP2PEnabled() {
|
||||||
app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig))
|
app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
|
||||||
app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
|
app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
app.Get("/version", func(c *fiber.Ctx) error {
|
||||||
return c.JSON(struct {
|
return c.JSON(struct {
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
}{Version: internal.PrintableVersion()})
|
}{Version: internal.PrintableVersion()})
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
|
app.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||||
|
|
||||||
|
// misc
|
||||||
|
app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,66 +11,65 @@ import (
|
|||||||
func RegisterOpenAIRoutes(app *fiber.App,
|
func RegisterOpenAIRoutes(app *fiber.App,
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
// openAI compatible API endpoint
|
// openAI compatible API endpoint
|
||||||
|
|
||||||
// chat
|
// chat
|
||||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// edit
|
// edit
|
||||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// assistant
|
// assistant
|
||||||
app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// files
|
// files
|
||||||
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
|
||||||
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
|
||||||
app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
|
||||||
app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||||
app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||||
|
|
||||||
// completion
|
// completion
|
||||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// embeddings
|
// embeddings
|
||||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// audio
|
// audio
|
||||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
|
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// images
|
// images
|
||||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
|
app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
if appConfig.ImageDir != "" {
|
if appConfig.ImageDir != "" {
|
||||||
app.Static("/generated-images", appConfig.ImageDir)
|
app.Static("/generated-images", appConfig.ImageDir)
|
||||||
@@ -81,6 +80,6 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// List models
|
// List models
|
||||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
|
||||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
app.Get("/models", openai.ListModelsEndpoint(cl, ml))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,8 +59,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
galleryService *services.GalleryService,
|
galleryService *services.GalleryService) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
// keeps the state of models that are being installed from the UI
|
// keeps the state of models that are being installed from the UI
|
||||||
var processingModels = NewModelOpCache()
|
var processingModels = NewModelOpCache()
|
||||||
@@ -85,10 +84,10 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return processingModelsData, taskTypes
|
return processingModelsData, taskTypes
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
|
app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
|
||||||
|
|
||||||
if p2p.IsP2PEnabled() {
|
if p2p.IsP2PEnabled() {
|
||||||
app.Get("/p2p", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p", func(c *fiber.Ctx) error {
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - P2P dashboard",
|
"Title": "LocalAI - P2P dashboard",
|
||||||
"Version": internal.PrintableVersion(),
|
"Version": internal.PrintableVersion(),
|
||||||
@@ -104,17 +103,17 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
})
|
})
|
||||||
|
|
||||||
/* show nodes live! */
|
/* show nodes live! */
|
||||||
app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
||||||
})
|
})
|
||||||
app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
||||||
})
|
})
|
||||||
app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -122,7 +121,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
if !appConfig.DisableGalleryEndpoint {
|
if !appConfig.DisableGalleryEndpoint {
|
||||||
|
|
||||||
// Show the Models page (all models)
|
// Show the Models page (all models)
|
||||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
app.Get("/browse", func(c *fiber.Ctx) error {
|
||||||
term := c.Query("term")
|
term := c.Query("term")
|
||||||
|
|
||||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||||
@@ -167,7 +166,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// Show the models, filtered from the user input
|
// Show the models, filtered from the user input
|
||||||
// https://htmx.org/examples/active-search/
|
// https://htmx.org/examples/active-search/
|
||||||
app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
|
app.Post("/browse/search/models", func(c *fiber.Ctx) error {
|
||||||
form := struct {
|
form := struct {
|
||||||
Search string `form:"search"`
|
Search string `form:"search"`
|
||||||
}{}
|
}{}
|
||||||
@@ -188,7 +187,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||||
// https://htmx.org/examples/progress-bar/
|
// https://htmx.org/examples/progress-bar/
|
||||||
app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
|
app.Post("/browse/install/model/:id", func(c *fiber.Ctx) error {
|
||||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||||
log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID)
|
log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID)
|
||||||
|
|
||||||
@@ -215,7 +214,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||||
// https://htmx.org/examples/progress-bar/
|
// https://htmx.org/examples/progress-bar/
|
||||||
app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
|
app.Post("/browse/delete/model/:id", func(c *fiber.Ctx) error {
|
||||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||||
log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID)
|
log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID)
|
||||||
var galleryName = galleryID
|
var galleryName = galleryID
|
||||||
@@ -255,7 +254,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
// Display the job current progress status
|
// Display the job current progress status
|
||||||
// If the job is done, we trigger the /browse/job/:uid route
|
// If the job is done, we trigger the /browse/job/:uid route
|
||||||
// https://htmx.org/examples/progress-bar/
|
// https://htmx.org/examples/progress-bar/
|
||||||
app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
|
app.Get("/browse/job/progress/:uid", func(c *fiber.Ctx) error {
|
||||||
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
||||||
|
|
||||||
status := galleryService.GetStatus(jobUID)
|
status := galleryService.GetStatus(jobUID)
|
||||||
@@ -279,7 +278,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// this route is hit when the job is done, and we display the
|
// this route is hit when the job is done, and we display the
|
||||||
// final state (for now just displays "Installation completed")
|
// final state (for now just displays "Installation completed")
|
||||||
app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
|
app.Get("/browse/job/:uid", func(c *fiber.Ctx) error {
|
||||||
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
||||||
|
|
||||||
status := galleryService.GetStatus(jobUID)
|
status := galleryService.GetStatus(jobUID)
|
||||||
@@ -303,8 +302,8 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Show the Chat page
|
// Show the Chat page
|
||||||
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||||
@@ -318,8 +317,8 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/chat", summary)
|
return c.Render("views/chat", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
app.Get("/talk/", func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
// If no model is available redirect to the index which suggests how to install models
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
@@ -338,9 +337,9 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/talk", summary)
|
return c.Render("views/talk", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
// If no model is available redirect to the index which suggests how to install models
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
@@ -359,7 +358,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/chat", summary)
|
return c.Render("views/chat", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/text2image/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
@@ -374,7 +373,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/text2image", summary)
|
return c.Render("views/text2image", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/text2image/", auth, func(c *fiber.Ctx) error {
|
app.Get("/text2image/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
@@ -395,7 +394,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/text2image", summary)
|
return c.Render("views/text2image", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/tts/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/tts/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
@@ -410,7 +409,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/tts", summary)
|
return c.Render("views/tts", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/tts/", auth, func(c *fiber.Ctx) error {
|
app.Get("/tts/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
|
||||||
"github.com/mudler/edgevpn/pkg/node"
|
"github.com/mudler/edgevpn/pkg/node"
|
||||||
@@ -41,7 +42,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
log.Error().Err(err).Msg("Error listening")
|
log.Error().Err(err).Msg("Error listening")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// ll.Info("Binding local port on", srcaddr)
|
|
||||||
go func() {
|
go func() {
|
||||||
<-ctx.Done()
|
<-ctx.Done()
|
||||||
l.Close()
|
l.Close()
|
||||||
@@ -82,6 +83,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
|
|
||||||
if workerID == "" {
|
if workerID == "" {
|
||||||
log.Error().Msg("No available nodes yet")
|
log.Error().Msg("No available nodes yet")
|
||||||
|
fs.sendHTMLResponse(conn, 503, "Sorry, waiting for nodes to connect")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,6 +91,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
nodeData, exists := GetNode(fs.service, workerID)
|
nodeData, exists := GetNode(fs.service, workerID)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Error().Msgf("Node %s not found", workerID)
|
log.Error().Msgf("Node %s not found", workerID)
|
||||||
|
fs.sendHTMLResponse(conn, 404, "Node not found")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,3 +103,42 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sendHTMLResponse sends a basic HTML response with a status code and a message.
|
||||||
|
// This is extracted to make the HTML content maintainable.
|
||||||
|
func (fs *FederatedServer) sendHTMLResponse(conn net.Conn, statusCode int, message string) {
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
// Define the HTML content separately for easier maintenance.
|
||||||
|
htmlContent := fmt.Sprintf("<html><body><h1>%s</h1></body></html>\r\n", message)
|
||||||
|
|
||||||
|
// Create the HTTP response with dynamic status code and content.
|
||||||
|
response := fmt.Sprintf(
|
||||||
|
"HTTP/1.1 %d %s\r\n"+
|
||||||
|
"Content-Type: text/html\r\n"+
|
||||||
|
"Connection: close\r\n"+
|
||||||
|
"\r\n"+
|
||||||
|
"%s",
|
||||||
|
statusCode, getHTTPStatusText(statusCode), htmlContent,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Write the response to the client connection.
|
||||||
|
_, writeErr := io.WriteString(conn, response)
|
||||||
|
if writeErr != nil {
|
||||||
|
log.Error().Err(writeErr).Msg("Error writing response to client")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getHTTPStatusText returns a textual representation of HTTP status codes.
|
||||||
|
func getHTTPStatusText(statusCode int) string {
|
||||||
|
switch statusCode {
|
||||||
|
case 503:
|
||||||
|
return "Service Unavailable"
|
||||||
|
case 404:
|
||||||
|
return "Not Found"
|
||||||
|
case 200:
|
||||||
|
return "OK"
|
||||||
|
default:
|
||||||
|
return "Unknown Status"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package schema
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -9,6 +10,10 @@ type BackendMonitorRequest struct {
|
|||||||
Model string `json:"model" yaml:"model"`
|
Model string `json:"model" yaml:"model"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TokenMetricsRequest struct {
|
||||||
|
Model string `json:"model" yaml:"model"`
|
||||||
|
}
|
||||||
|
|
||||||
type BackendMonitorResponse struct {
|
type BackendMonitorResponse struct {
|
||||||
MemoryInfo *gopsutil.MemoryInfoStat
|
MemoryInfo *gopsutil.MemoryInfoStat
|
||||||
MemoryPercent float32
|
MemoryPercent float32
|
||||||
@@ -72,5 +77,6 @@ type P2PNodesResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type SystemInformationResponse struct {
|
type SystemInformationResponse struct {
|
||||||
Backends []string `json:"backends"`
|
Backends []string `json:"backends"`
|
||||||
|
Models []model.Model `json:"loaded_models"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,6 +58,8 @@ type Content struct {
|
|||||||
Type string `json:"type" yaml:"type"`
|
Type string `json:"type" yaml:"type"`
|
||||||
Text string `json:"text" yaml:"text"`
|
Text string `json:"text" yaml:"text"`
|
||||||
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||||
|
AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
|
||||||
|
VideoURL ContentURL `json:"video_url" yaml:"video_url"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ContentURL struct {
|
type ContentURL struct {
|
||||||
@@ -76,6 +78,8 @@ type Message struct {
|
|||||||
|
|
||||||
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||||
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||||
|
StringVideos []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
|
||||||
|
StringAudios []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
|
||||||
|
|
||||||
// A result of a function call
|
// A result of a function call
|
||||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||||
|
|||||||
10
core/schema/tokenize.go
Normal file
10
core/schema/tokenize.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package schema
|
||||||
|
|
||||||
|
type TokenizeRequest struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TokenizeResponse struct {
|
||||||
|
Tokens []int32 `json:"tokens"`
|
||||||
|
}
|
||||||
@@ -1,55 +1,47 @@
|
|||||||
package services
|
package services
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]string, error) {
|
type LooseFilePolicy int
|
||||||
|
|
||||||
models, err := ml.ListFilesInModelPath()
|
const (
|
||||||
if err != nil {
|
SKIP_IF_CONFIGURED LooseFilePolicy = iota
|
||||||
return nil, err
|
SKIP_ALWAYS
|
||||||
}
|
ALWAYS_INCLUDE
|
||||||
|
LOOSE_ONLY
|
||||||
|
)
|
||||||
|
|
||||||
var mm map[string]interface{} = map[string]interface{}{}
|
func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter config.BackendConfigFilterFn, looseFilePolicy LooseFilePolicy) ([]string, error) {
|
||||||
|
|
||||||
|
var skipMap map[string]interface{} = map[string]interface{}{}
|
||||||
|
|
||||||
dataModels := []string{}
|
dataModels := []string{}
|
||||||
|
|
||||||
var filterFn func(name string) bool
|
// Start with known configurations
|
||||||
|
if looseFilePolicy != LOOSE_ONLY {
|
||||||
// If filter is not specified, do not filter the list by model name
|
for _, c := range bcl.GetBackendConfigsByFilter(filter) {
|
||||||
if filter == "" {
|
if looseFilePolicy == SKIP_IF_CONFIGURED {
|
||||||
filterFn = func(_ string) bool { return true }
|
skipMap[c.Model] = nil
|
||||||
} else {
|
}
|
||||||
// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
|
|
||||||
rxp, err := regexp.Compile(filter)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
filterFn = func(name string) bool {
|
|
||||||
return rxp.MatchString(name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start with the known configurations
|
|
||||||
for _, c := range bcl.GetAllBackendConfigs() {
|
|
||||||
if excludeConfigured {
|
|
||||||
mm[c.Model] = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if filterFn(c.Name) {
|
|
||||||
dataModels = append(dataModels, c.Name)
|
dataModels = append(dataModels, c.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then iterate through the loose files:
|
// Then iterate through the loose files if requested.
|
||||||
for _, m := range models {
|
if looseFilePolicy != SKIP_ALWAYS {
|
||||||
// And only adds them if they shouldn't be skipped.
|
|
||||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
models, err := ml.ListFilesInModelPath()
|
||||||
dataModels = append(dataModels, m)
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, m := range models {
|
||||||
|
// And only adds them if they shouldn't be skipped.
|
||||||
|
if _, exists := skipMap[m]; !exists && filter(m, nil) {
|
||||||
|
dataModels = append(dataModels, m)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,206 +1,237 @@
|
|||||||
package startup
|
package startup
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core"
|
"github.com/mudler/LocalAI/core"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/internal"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
"github.com/mudler/LocalAI/pkg/assets"
|
"github.com/mudler/LocalAI/internal"
|
||||||
"github.com/mudler/LocalAI/pkg/library"
|
"github.com/mudler/LocalAI/pkg/assets"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/library"
|
||||||
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||||
)
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
|
|
||||||
options := config.NewApplicationConfig(opts...)
|
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
|
||||||
|
options := config.NewApplicationConfig(opts...)
|
||||||
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
|
|
||||||
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
|
||||||
caps, err := xsysinfo.CPUCapabilities()
|
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
||||||
if err == nil {
|
caps, err := xsysinfo.CPUCapabilities()
|
||||||
log.Debug().Msgf("CPU capabilities: %v", caps)
|
if err == nil {
|
||||||
}
|
log.Debug().Msgf("CPU capabilities: %v", caps)
|
||||||
gpus, err := xsysinfo.GPUs()
|
}
|
||||||
if err == nil {
|
gpus, err := xsysinfo.GPUs()
|
||||||
log.Debug().Msgf("GPU count: %d", len(gpus))
|
if err == nil {
|
||||||
for _, gpu := range gpus {
|
log.Debug().Msgf("GPU count: %d", len(gpus))
|
||||||
log.Debug().Msgf("GPU: %s", gpu.String())
|
for _, gpu := range gpus {
|
||||||
}
|
log.Debug().Msgf("GPU: %s", gpu.String())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// Make sure directories exists
|
|
||||||
if options.ModelPath == "" {
|
// Make sure directories exists
|
||||||
return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
|
if options.ModelPath == "" {
|
||||||
}
|
return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
|
||||||
err = os.MkdirAll(options.ModelPath, 0750)
|
}
|
||||||
if err != nil {
|
err = os.MkdirAll(options.ModelPath, 0750)
|
||||||
return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
if err != nil {
|
||||||
}
|
return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
|
||||||
if options.ImageDir != "" {
|
}
|
||||||
err := os.MkdirAll(options.ImageDir, 0750)
|
if options.ImageDir != "" {
|
||||||
if err != nil {
|
err := os.MkdirAll(options.ImageDir, 0750)
|
||||||
return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
if err != nil {
|
||||||
}
|
return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
|
||||||
}
|
}
|
||||||
if options.AudioDir != "" {
|
}
|
||||||
err := os.MkdirAll(options.AudioDir, 0750)
|
if options.AudioDir != "" {
|
||||||
if err != nil {
|
err := os.MkdirAll(options.AudioDir, 0750)
|
||||||
return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
|
if err != nil {
|
||||||
}
|
return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
|
||||||
}
|
}
|
||||||
if options.UploadDir != "" {
|
}
|
||||||
err := os.MkdirAll(options.UploadDir, 0750)
|
if options.UploadDir != "" {
|
||||||
if err != nil {
|
err := os.MkdirAll(options.UploadDir, 0750)
|
||||||
return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
|
if err != nil {
|
||||||
}
|
return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
|
||||||
log.Error().Err(err).Msg("error installing models")
|
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||||
}
|
log.Error().Err(err).Msg("error installing models")
|
||||||
|
}
|
||||||
cl := config.NewBackendConfigLoader(options.ModelPath)
|
|
||||||
ml := model.NewModelLoader(options.ModelPath)
|
cl := config.NewBackendConfigLoader(options.ModelPath)
|
||||||
|
ml := model.NewModelLoader(options.ModelPath)
|
||||||
configLoaderOpts := options.ToConfigLoaderOptions()
|
|
||||||
|
configLoaderOpts := options.ToConfigLoaderOptions()
|
||||||
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
|
|
||||||
log.Error().Err(err).Msg("error loading config files")
|
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
|
||||||
}
|
log.Error().Err(err).Msg("error loading config files")
|
||||||
|
}
|
||||||
if options.ConfigFile != "" {
|
|
||||||
if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
|
if options.ConfigFile != "" {
|
||||||
log.Error().Err(err).Msg("error loading config file")
|
if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
|
||||||
}
|
log.Error().Err(err).Msg("error loading config file")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if err := cl.Preload(options.ModelPath); err != nil {
|
|
||||||
log.Error().Err(err).Msg("error downloading models")
|
if err := cl.Preload(options.ModelPath); err != nil {
|
||||||
}
|
log.Error().Err(err).Msg("error downloading models")
|
||||||
|
}
|
||||||
if options.PreloadJSONModels != "" {
|
|
||||||
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
if options.PreloadJSONModels != "" {
|
||||||
return nil, nil, nil, err
|
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
||||||
}
|
return nil, nil, nil, err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if options.PreloadModelsFromPath != "" {
|
|
||||||
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
if options.PreloadModelsFromPath != "" {
|
||||||
return nil, nil, nil, err
|
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
|
||||||
}
|
return nil, nil, nil, err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if options.Debug {
|
|
||||||
for _, v := range cl.GetAllBackendConfigs() {
|
if options.Debug {
|
||||||
log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
|
for _, v := range cl.GetAllBackendConfigs() {
|
||||||
}
|
log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if options.AssetsDestination != "" {
|
|
||||||
// Extract files from the embedded FS
|
if options.AssetsDestination != "" {
|
||||||
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
|
// Extract files from the embedded FS
|
||||||
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
|
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
|
||||||
if err != nil {
|
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
|
||||||
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
|
if err != nil {
|
||||||
}
|
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if options.LibPath != "" {
|
|
||||||
// If there is a lib directory, set LD_LIBRARY_PATH to include it
|
if options.LibPath != "" {
|
||||||
err := library.LoadExternal(options.LibPath)
|
// If there is a lib directory, set LD_LIBRARY_PATH to include it
|
||||||
if err != nil {
|
err := library.LoadExternal(options.LibPath)
|
||||||
log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
|
if err != nil {
|
||||||
}
|
log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// turn off any process that was started by GRPC if the context is canceled
|
|
||||||
go func() {
|
// turn off any process that was started by GRPC if the context is canceled
|
||||||
<-options.Context.Done()
|
go func() {
|
||||||
log.Debug().Msgf("Context canceled, shutting down")
|
<-options.Context.Done()
|
||||||
err := ml.StopAllGRPC()
|
log.Debug().Msgf("Context canceled, shutting down")
|
||||||
if err != nil {
|
err := ml.StopAllGRPC()
|
||||||
log.Error().Err(err).Msg("error while stopping all grpc backends")
|
if err != nil {
|
||||||
}
|
log.Error().Err(err).Msg("error while stopping all grpc backends")
|
||||||
}()
|
}
|
||||||
|
}()
|
||||||
if options.WatchDog {
|
|
||||||
wd := model.NewWatchDog(
|
if options.WatchDog {
|
||||||
ml,
|
wd := model.NewWatchDog(
|
||||||
options.WatchDogBusyTimeout,
|
ml,
|
||||||
options.WatchDogIdleTimeout,
|
options.WatchDogBusyTimeout,
|
||||||
options.WatchDogBusy,
|
options.WatchDogIdleTimeout,
|
||||||
options.WatchDogIdle)
|
options.WatchDogBusy,
|
||||||
ml.SetWatchDog(wd)
|
options.WatchDogIdle)
|
||||||
go wd.Run()
|
ml.SetWatchDog(wd)
|
||||||
go func() {
|
go wd.Run()
|
||||||
<-options.Context.Done()
|
go func() {
|
||||||
log.Debug().Msgf("Context canceled, shutting down")
|
<-options.Context.Done()
|
||||||
wd.Shutdown()
|
log.Debug().Msgf("Context canceled, shutting down")
|
||||||
}()
|
wd.Shutdown()
|
||||||
}
|
}()
|
||||||
|
}
|
||||||
// Watch the configuration directory
|
|
||||||
startWatcher(options)
|
if options.LoadToMemory != nil {
|
||||||
|
for _, m := range options.LoadToMemory {
|
||||||
log.Info().Msg("core/startup process completed!")
|
cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath,
|
||||||
return cl, ml, options, nil
|
config.LoadOptionDebug(options.Debug),
|
||||||
}
|
config.LoadOptionThreads(options.Threads),
|
||||||
|
config.LoadOptionContextSize(options.ContextSize),
|
||||||
func startWatcher(options *config.ApplicationConfig) {
|
config.LoadOptionF16(options.F16),
|
||||||
if options.DynamicConfigsDir == "" {
|
config.ModelPath(options.ModelPath),
|
||||||
// No need to start the watcher if the directory is not set
|
)
|
||||||
return
|
if err != nil {
|
||||||
}
|
return nil, nil, nil, err
|
||||||
|
}
|
||||||
if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
|
||||||
// We try to create the directory if it does not exist and was specified
|
|
||||||
if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
|
o := backend.ModelOptions(*cfg, options, []model.Option{})
|
||||||
log.Error().Err(err).Msg("failed creating DynamicConfigsDir")
|
|
||||||
}
|
var backendErr error
|
||||||
} else {
|
if cfg.Backend != "" {
|
||||||
// something else happened, we log the error and don't start the watcher
|
o = append(o, model.WithBackendString(cfg.Backend))
|
||||||
log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started")
|
_, backendErr = ml.BackendLoader(o...)
|
||||||
return
|
} else {
|
||||||
}
|
_, backendErr = ml.GreedyLoader(o...)
|
||||||
}
|
}
|
||||||
|
if backendErr != nil {
|
||||||
configHandler := newConfigFileHandler(options)
|
return nil, nil, nil, err
|
||||||
if err := configHandler.Watch(); err != nil {
|
}
|
||||||
log.Error().Err(err).Msg("failed creating watcher")
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
// Watch the configuration directory
|
||||||
// In Lieu of a proper DI framework, this function wires up the Application manually.
|
startWatcher(options)
|
||||||
// This is in core/startup rather than core/state.go to keep package references clean!
|
|
||||||
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
|
log.Info().Msg("core/startup process completed!")
|
||||||
app := &core.Application{
|
return cl, ml, options, nil
|
||||||
ApplicationConfig: appConfig,
|
}
|
||||||
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
|
||||||
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
|
func startWatcher(options *config.ApplicationConfig) {
|
||||||
}
|
if options.DynamicConfigsDir == "" {
|
||||||
|
// No need to start the watcher if the directory is not set
|
||||||
var err error
|
return
|
||||||
|
}
|
||||||
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
|
||||||
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
|
||||||
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
if os.IsNotExist(err) {
|
||||||
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
// We try to create the directory if it does not exist and was specified
|
||||||
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
|
||||||
|
log.Error().Err(err).Msg("failed creating DynamicConfigsDir")
|
||||||
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
}
|
||||||
app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
|
} else {
|
||||||
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
|
// something else happened, we log the error and don't start the watcher
|
||||||
|
log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started")
|
||||||
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
|
return
|
||||||
if err != nil {
|
}
|
||||||
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
|
}
|
||||||
}
|
|
||||||
|
configHandler := newConfigFileHandler(options)
|
||||||
return app
|
if err := configHandler.Watch(); err != nil {
|
||||||
}
|
log.Error().Err(err).Msg("failed creating watcher")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In Lieu of a proper DI framework, this function wires up the Application manually.
|
||||||
|
// This is in core/startup rather than core/state.go to keep package references clean!
|
||||||
|
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
|
||||||
|
app := &core.Application{
|
||||||
|
ApplicationConfig: appConfig,
|
||||||
|
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
||||||
|
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||||
|
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||||
|
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||||
|
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||||
|
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||||
|
|
||||||
|
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||||
|
app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
|
||||||
|
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
|
||||||
|
|
||||||
|
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
|
||||||
|
}
|
||||||
|
|
||||||
|
return app
|
||||||
|
}
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ Images are available with and without python dependencies. Note that images with
|
|||||||
|
|
||||||
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
||||||
|
|
||||||
{{< tabs tabTotal="6" >}}
|
{{< tabs tabTotal="7" >}}
|
||||||
{{% tab tabName="Vanilla / CPU Images" %}}
|
{{% tab tabName="Vanilla / CPU Images" %}}
|
||||||
|
|
||||||
| Description | Quay | Docker Hub |
|
| Description | Quay | Docker Hub |
|
||||||
@@ -227,6 +227,15 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
|||||||
|
|
||||||
{{% /tab %}}
|
{{% /tab %}}
|
||||||
|
|
||||||
|
|
||||||
|
{{% tab tabName="Vulkan Images" %}}
|
||||||
|
| Description | Quay | Docker Hub |
|
||||||
|
| --- | --- |-------------------------------------------------------------|
|
||||||
|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai: master-vulkan-ffmpeg-core ` | `localai/localai: master-vulkan-ffmpeg-core ` |
|
||||||
|
| Latest tag | `quay.io/go-skynet/local-ai: latest-vulkan-ffmpeg-core ` | `localai/localai: latest-vulkan-ffmpeg-core` |
|
||||||
|
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan-fmpeg-core` | `localai/localai:{{< version >}}-vulkan-fmpeg-core` |
|
||||||
|
{{% /tab %}}
|
||||||
|
|
||||||
{{< /tabs >}}
|
{{< /tabs >}}
|
||||||
|
|
||||||
## See Also
|
## See Also
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user