mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-04 03:32:40 -05:00
Compare commits
57 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b58274b8a2 | ||
|
|
a31d00d904 | ||
|
|
2cc1bd85af | ||
|
|
2c5a46bc34 | ||
|
|
f7f8b4804b | ||
|
|
e5bd9a76c7 | ||
|
|
4690b534e0 | ||
|
|
6a7a7996bb | ||
|
|
962ebbaf77 | ||
|
|
f90d56d371 | ||
|
|
445cfd4db3 | ||
|
|
b24d44dc56 | ||
|
|
cd31f8d865 | ||
|
|
970cb3a219 | ||
|
|
f7aabf1b50 | ||
|
|
e38610e521 | ||
|
|
3754f154ee | ||
|
|
29d7812344 | ||
|
|
5fd46175dc | ||
|
|
52a268c38c | ||
|
|
53c3842bc2 | ||
|
|
c4f958e11b | ||
|
|
147440b39b | ||
|
|
baff5ff8c2 | ||
|
|
ea13863221 | ||
|
|
93ca56086e | ||
|
|
11c48a0004 | ||
|
|
b7ea9602f5 | ||
|
|
982dc6a2bd | ||
|
|
74d903acca | ||
|
|
5fef3b0ff1 | ||
|
|
0674893649 | ||
|
|
e8d44447ad | ||
|
|
a24cd4fda0 | ||
|
|
01860674c4 | ||
|
|
987b7ad42d | ||
|
|
21974fe1d3 | ||
|
|
26e1892521 | ||
|
|
a78cd67737 | ||
|
|
5e243ceaeb | ||
|
|
1a0a6f60a7 | ||
|
|
3179c019af | ||
|
|
a8089494fd | ||
|
|
a248ede222 | ||
|
|
0f0ae13ad0 | ||
|
|
773d5d23d5 | ||
|
|
c3982212f9 | ||
|
|
7e6bf6e7a1 | ||
|
|
9fc0135991 | ||
|
|
164be58445 | ||
|
|
1f8461767d | ||
|
|
935f4c23f6 | ||
|
|
4c97406f2b | ||
|
|
fb2a05ff43 | ||
|
|
030d555995 | ||
|
|
56d843c263 | ||
|
|
2dc1fa2474 |
4
.env
4
.env
@@ -10,7 +10,7 @@
|
||||
#
|
||||
## Define galleries.
|
||||
## models will to install will be visible in `/models/available`
|
||||
# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
|
||||
|
||||
## CORS settings
|
||||
# LOCALAI_CORS=true
|
||||
@@ -86,4 +86,4 @@
|
||||
# LOCALAI_WATCHDOG_BUSY=true
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||
|
||||
2
.github/bump_docs.sh
vendored
2
.github/bump_docs.sh
vendored
@@ -2,6 +2,6 @@
|
||||
set -xe
|
||||
REPO=$1
|
||||
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
|
||||
|
||||
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||
|
||||
7
.github/labeler.yml
vendored
7
.github/labeler.yml
vendored
@@ -8,6 +8,11 @@ kind/documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*.md'
|
||||
|
||||
area/ai-model:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'gallery/*'
|
||||
|
||||
examples:
|
||||
- any:
|
||||
- changed-files:
|
||||
@@ -16,4 +21,4 @@ examples:
|
||||
ci:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '.github/*'
|
||||
- any-glob-to-any-file: '.github/*'
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.0.0
|
||||
uses: dependabot/fetch-metadata@v2.1.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
10
.github/workflows/generate_grpc_cache.yaml
vendored
10
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -1,7 +1,10 @@
|
||||
name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
- workflow_dispatch
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -80,11 +83,12 @@ jobs:
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.58.0
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.63.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-to: type=gha,ignore-error=true
|
||||
cache-from: type=gha
|
||||
target: grpc
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: false
|
||||
31
.github/workflows/image_build.yml
vendored
31
.github/workflows/image_build.yml
vendored
@@ -201,30 +201,14 @@ jobs:
|
||||
username: ${{ secrets.quayUsername }}
|
||||
password: ${{ secrets.quayPassword }}
|
||||
|
||||
- name: Cache GRPC
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.58.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
target: grpc
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: false
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
@@ -232,6 +216,9 @@ jobs:
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.63.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -241,14 +228,6 @@ jobs:
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Inspect image
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
|
||||
- name: Build and push AIO image
|
||||
if: inputs.aio != ''
|
||||
uses: docker/build-push-action@v5
|
||||
|
||||
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
- pull_request
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.58.0
|
||||
GRPC_VERSION: v1.63.0
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
22
.github/workflows/test-extra.yml
vendored
22
.github/workflows/test-extra.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -125,7 +125,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -217,7 +217,7 @@ jobs:
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -289,7 +289,7 @@ jobs:
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
@@ -322,7 +322,7 @@ jobs:
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
@@ -349,7 +349,7 @@ jobs:
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
@@ -376,7 +376,7 @@ jobs:
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test coqui
|
||||
|
||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.58.0
|
||||
GRPC_VERSION: v1.63.0
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -203,7 +203,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
|
||||
192
Dockerfile
192
Dockerfile
@@ -2,41 +2,42 @@ ARG IMAGE_TYPE=extras
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# extras or core
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.21.7
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
unzip && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
pip install --upgrade pip
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH $PATH:/usr/local/go/bin
|
||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
ENV PATH $PATH:/root/go/bin
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
|
||||
# Install protobuf (the version in 22.04 is too old)
|
||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
# Install grpcio-tools (the version in 22.04 is too old)
|
||||
RUN pip install --user grpcio-tools
|
||||
|
||||
@@ -47,16 +48,6 @@ RUN update-ca-certificates
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get install -y software-properties-common && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||
; fi
|
||||
|
||||
# Cuda
|
||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
|
||||
@@ -64,10 +55,12 @@ ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
ENV PATH /opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get install -y \
|
||||
libopenblas-dev \
|
||||
libopencv-dev \
|
||||
&& apt-get clean
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
libopencv-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
@@ -80,56 +73,114 @@ RUN test -n "$TARGETARCH" \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||
FROM requirements-core AS requirements-extras
|
||||
|
||||
RUN apt install -y gpg && \
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends gpg && \
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y conda && apt-get clean
|
||||
apt-get install -y --no-install-recommends \
|
||||
conda && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN apt-get install -y python3-pip && apt-get clean
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
espeak-ng \
|
||||
espeak && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN if [ ! -e /usr/bin/python ]; then \
|
||||
ln -s /usr/bin/python3 /usr/bin/python \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
ARG MAKEFLAGS
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.58.0
|
||||
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
build-essential \
|
||||
cmake \
|
||||
git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
|
||||
|
||||
WORKDIR /build/grpc/cmake/build
|
||||
|
||||
RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
||||
make
|
||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||
# and running make install in the target container
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
mkdir -p /build/grpc/cmake/build && \
|
||||
cd /build/grpc/cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||
make && \
|
||||
make install && \
|
||||
rm -rf /build
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE} AS builder
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
ARG GRPC_BACKENDS
|
||||
@@ -148,41 +199,36 @@ COPY . .
|
||||
COPY .git .
|
||||
RUN echo "GO_TAGS: $GO_TAGS"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN make prepare
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y libclblast-dev && \
|
||||
apt-get clean \
|
||||
; fi
|
||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||
# here so that we can generate the grpc code for the stablediffusion build
|
||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
|
||||
WORKDIR /build/grpc/cmake/build
|
||||
RUN make install
|
||||
# Install the pre-built GRPC
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE}
|
||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
@@ -203,21 +249,13 @@ ENV PIP_CACHE_PURGE=true
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get install -y ffmpeg && apt-get clean \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# Add OpenCL
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y libclblast1 && \
|
||||
apt-get clean \
|
||||
; fi
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
||||
@@ -227,9 +265,9 @@ WORKDIR /build
|
||||
COPY . .
|
||||
|
||||
COPY --from=builder /build/sources ./sources/
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc
|
||||
RUN make prepare-sources
|
||||
|
||||
# Copy the binary
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
12
Makefile
12
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=46e12c4692a37bdd31a0432fc5153d7d22bc7f72
|
||||
CPPLLAMA_VERSION?=6ecf3189e00a1e8e737a78b6d10e1d7006e050a2
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418
|
||||
WHISPER_CPP_VERSION?=8fac6455ffeb0a0950a84e790ddb74f7290d33c4
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
@@ -25,10 +25,10 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# tinydream version
|
||||
TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
@@ -99,7 +99,7 @@ endif
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export LLAMA_CUBLAS=1
|
||||
export WHISPER_CUBLAS=1
|
||||
export WHISPER_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||
endif
|
||||
|
||||
@@ -240,7 +240,7 @@ sources/whisper.cpp:
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && make libwhisper.a
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||
|
||||
|
||||
@@ -59,6 +59,8 @@
|
||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
@@ -89,7 +91,8 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🆕 [Reranker API](https://localai.io/features/reranker/)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
|
||||
@@ -2,7 +2,7 @@ name: gpt-4
|
||||
mmap: false
|
||||
f16: false
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
|
||||
@@ -29,8 +29,8 @@ func audioToWav(src, dst string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
|
||||
res := schema.Result{}
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
|
||||
res := schema.TranscriptionResult{}
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
|
||||
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
||||
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ dependencies:
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
|
||||
@@ -26,7 +26,7 @@ if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the intel image
|
||||
# (no conda env)
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
|
||||
pip install torch==2.1.0.post0 torchvision==0.16.0.post0 torchaudio==2.1.0.post0 intel-extension-for-pytorch==2.1.20+xpu oneccl_bind_pt==2.1.200+xpu intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
fi
|
||||
|
||||
# If we didn't skip conda, activate the environment
|
||||
|
||||
@@ -47,7 +47,7 @@ dependencies:
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
|
||||
@@ -48,7 +48,7 @@ dependencies:
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
|
||||
@@ -47,7 +47,7 @@ dependencies:
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- humanfriendly==10.0
|
||||
- idna==3.4
|
||||
|
||||
@@ -34,7 +34,7 @@ dependencies:
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
|
||||
@@ -32,7 +32,7 @@ dependencies:
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
|
||||
@@ -31,8 +31,8 @@ if [ -d "/opt/intel" ]; then
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
pip install google-api-python-client \
|
||||
grpcio \
|
||||
grpcio-tools \
|
||||
grpcio==1.63.0 \
|
||||
grpcio-tools==1.63.0 \
|
||||
diffusers==0.24.0 \
|
||||
transformers>=4.25.1 \
|
||||
accelerate \
|
||||
|
||||
@@ -27,7 +27,7 @@ dependencies:
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
|
||||
@@ -27,7 +27,7 @@ dependencies:
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
|
||||
@@ -26,7 +26,7 @@ dependencies:
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
|
||||
@@ -27,7 +27,7 @@ dependencies:
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- numpy==1.26.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- torch==2.1.0
|
||||
- transformers>=4.34.0
|
||||
- descript-audio-codec
|
||||
|
||||
@@ -89,8 +89,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
quantization = None
|
||||
|
||||
if self.CUDA:
|
||||
if request.Device:
|
||||
device_map=request.Device
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
device_map="cuda:0"
|
||||
if request.Quantization == "bnb_4bit":
|
||||
@@ -143,14 +143,37 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
from optimum.intel.openvino import OVModelForCausalLM
|
||||
from openvino.runtime import Core
|
||||
|
||||
if "GPU" in Core().available_devices:
|
||||
device_map="GPU"
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
device_map="CPU"
|
||||
device_map="AUTO"
|
||||
devices = Core().available_devices
|
||||
if "GPU" in " ".join(devices):
|
||||
device_map="AUTO:GPU"
|
||||
|
||||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT","GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"},
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
elif request.Type == "OVModelForFeatureExtraction":
|
||||
from optimum.intel.openvino import OVModelForFeatureExtraction
|
||||
from openvino.runtime import Core
|
||||
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
device_map="AUTO"
|
||||
devices = Core().available_devices
|
||||
if "GPU" in " ".join(devices):
|
||||
device_map="AUTO:GPU"
|
||||
|
||||
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT", "GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"},
|
||||
export=True,
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
@@ -356,4 +379,4 @@ if __name__ == "__main__":
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
asyncio.run(serve(args.addr))
|
||||
asyncio.run(serve(args.addr))
|
||||
|
||||
@@ -42,7 +42,7 @@ dependencies:
|
||||
- future==0.18.3
|
||||
- gradio==3.47.1
|
||||
- gradio-client==0.6.0
|
||||
- grpcio==1.59.0
|
||||
- grpcio==1.63.0
|
||||
- h11==0.14.0
|
||||
- httpcore==0.18.0
|
||||
- httpx==0.25.0
|
||||
|
||||
39
core/application.go
Normal file
39
core/application.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package core
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
|
||||
// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
|
||||
type Application struct {
|
||||
|
||||
// Application-Level Config
|
||||
ApplicationConfig *config.ApplicationConfig
|
||||
// ApplicationState *ApplicationState
|
||||
|
||||
// Core Low-Level Services
|
||||
BackendConfigLoader *config.BackendConfigLoader
|
||||
ModelLoader *model.ModelLoader
|
||||
|
||||
// Backend Services
|
||||
// EmbeddingsBackendService *backend.EmbeddingsBackendService
|
||||
// ImageGenerationBackendService *backend.ImageGenerationBackendService
|
||||
// LLMBackendService *backend.LLMBackendService
|
||||
// TranscriptionBackendService *backend.TranscriptionBackendService
|
||||
// TextToSpeechBackendService *backend.TextToSpeechBackendService
|
||||
|
||||
// LocalAI System Services
|
||||
BackendMonitorService *services.BackendMonitorService
|
||||
GalleryService *services.GalleryService
|
||||
ListModelsService *services.ListModelsService
|
||||
LocalAIMetricsService *services.LocalAIMetricsService
|
||||
// OpenAIService *services.OpenAIService
|
||||
}
|
||||
|
||||
// TODO [NEXT PR?]: Break up ApplicationConfig.
|
||||
// Migrate over stuff that is not set via config at all - especially runtime stuff
|
||||
type ApplicationState struct {
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.Result, error) {
|
||||
func ModelTranscription(audio, language string, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||
|
||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
||||
model.WithBackendString(model.WhisperBackend),
|
||||
|
||||
@@ -64,7 +64,11 @@ func (mi *ModelsInstall) Run(ctx *Context) error {
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
progressBar.Set(int(percentage * 10))
|
||||
v := int(percentage * 10)
|
||||
err := progressBar.Set(v)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
|
||||
}
|
||||
}
|
||||
err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback)
|
||||
if err != nil {
|
||||
|
||||
@@ -42,7 +42,7 @@ type RunCMD struct {
|
||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWelcome bool `env:"LOCALAI_DISABLE_WELCOME,DISABLE_WELCOME" default:"false" help:"Disable welcome pages" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||
@@ -84,8 +84,8 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
idleWatchDog := r.EnableWatchdogIdle
|
||||
busyWatchDog := r.EnableWatchdogBusy
|
||||
|
||||
if r.DisableWelcome {
|
||||
opts = append(opts, config.DisableWelcomePage)
|
||||
if r.DisableWebUI {
|
||||
opts = append(opts, config.DisableWebUI)
|
||||
}
|
||||
|
||||
if idleWatchDog || busyWatchDog {
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type TranscriptCMD struct {
|
||||
@@ -41,7 +42,12 @@ func (t *TranscriptCMD) Run(ctx *Context) error {
|
||||
|
||||
c.Threads = &t.Threads
|
||||
|
||||
defer ml.StopAllGRPC()
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("unable to stop all grpc processes")
|
||||
}
|
||||
}()
|
||||
|
||||
tr, err := backend.ModelTranscription(t.Filename, t.Language, ml, c, opts)
|
||||
if err != nil {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type TTSCMD struct {
|
||||
@@ -40,7 +41,12 @@ func (t *TTSCMD) Run(ctx *Context) error {
|
||||
}
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
|
||||
defer ml.StopAllGRPC()
|
||||
defer func() {
|
||||
err := ml.StopAllGRPC()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("unable to stop all grpc processes")
|
||||
}
|
||||
}()
|
||||
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
|
||||
@@ -15,7 +15,7 @@ type ApplicationConfig struct {
|
||||
ConfigFile string
|
||||
ModelPath string
|
||||
UploadLimitMB, Threads, ContextSize int
|
||||
DisableWelcomePage bool
|
||||
DisableWebUI bool
|
||||
F16 bool
|
||||
Debug bool
|
||||
ImageDir string
|
||||
@@ -107,8 +107,8 @@ var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
|
||||
o.WatchDogBusy = true
|
||||
}
|
||||
|
||||
var DisableWelcomePage = func(o *ApplicationConfig) {
|
||||
o.DisableWelcomePage = true
|
||||
var DisableWebUI = func(o *ApplicationConfig) {
|
||||
o.DisableWebUI = true
|
||||
}
|
||||
|
||||
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
|
||||
|
||||
@@ -1,23 +1,12 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/charmbracelet/glamour"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -140,7 +129,7 @@ type LLMConfig struct {
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
@@ -184,6 +173,36 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
|
||||
return len(c.functionCallNameString) > 0
|
||||
}
|
||||
|
||||
// MMProjFileName returns the filename of the MMProj file
|
||||
// If the MMProj is a URL, it will return the MD5 of the URL which is the filename
|
||||
func (c *BackendConfig) MMProjFileName() string {
|
||||
modelURL := downloader.ConvertURL(c.MMProj)
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
return utils.MD5(modelURL)
|
||||
}
|
||||
|
||||
return c.MMProj
|
||||
}
|
||||
|
||||
func (c *BackendConfig) IsMMProjURL() bool {
|
||||
return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj))
|
||||
}
|
||||
|
||||
func (c *BackendConfig) IsModelURL() bool {
|
||||
return downloader.LooksLikeURL(downloader.ConvertURL(c.Model))
|
||||
}
|
||||
|
||||
// ModelFileName returns the filename of the model
|
||||
// If the model is a URL, it will return the MD5 of the URL which is the filename
|
||||
func (c *BackendConfig) ModelFileName() string {
|
||||
modelURL := downloader.ConvertURL(c.Model)
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
return utils.MD5(modelURL)
|
||||
}
|
||||
|
||||
return c.Model
|
||||
}
|
||||
|
||||
func (c *BackendConfig) FunctionToCall() string {
|
||||
if c.functionCallNameString != "" &&
|
||||
c.functionCallNameString != "none" && c.functionCallNameString != "auto" {
|
||||
@@ -238,7 +257,13 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
|
||||
if cfg.MMap == nil {
|
||||
// MMap is enabled by default
|
||||
cfg.MMap = &trueV
|
||||
|
||||
// Only exception is for Intel GPUs
|
||||
if os.Getenv("XPU") != "" {
|
||||
cfg.MMap = &falseV
|
||||
} else {
|
||||
cfg.MMap = &trueV
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.MMlock == nil {
|
||||
@@ -307,287 +332,3 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Debug = &trueV
|
||||
}
|
||||
}
|
||||
|
||||
////// Config Loader ////////
|
||||
|
||||
type BackendConfigLoader struct {
|
||||
configs map[string]BackendConfig
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
type LoadOptions struct {
|
||||
debug bool
|
||||
threads, ctxSize int
|
||||
f16 bool
|
||||
}
|
||||
|
||||
func LoadOptionDebug(debug bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.debug = debug
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionThreads(threads int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.threads = threads
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.ctxSize = ctxSize
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionF16(f16 bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.f16 = f16
|
||||
}
|
||||
}
|
||||
|
||||
type ConfigLoaderOption func(*LoadOptions)
|
||||
|
||||
func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
|
||||
for _, l := range options {
|
||||
l(lo)
|
||||
}
|
||||
}
|
||||
|
||||
// Load a config file for a model
|
||||
func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
cfgExisting, exists := cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
} else {
|
||||
// Try loading a model config file
|
||||
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cl.LoadBackendConfig(
|
||||
modelConfig, opts...,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
cfgExisting, exists = cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.SetDefaults(opts...)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func NewBackendConfigLoader() *BackendConfigLoader {
|
||||
return &BackendConfigLoader{
|
||||
configs: make(map[string]BackendConfig),
|
||||
}
|
||||
}
|
||||
func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
cc.SetDefaults(opts...)
|
||||
}
|
||||
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
lo := &LoadOptions{}
|
||||
lo.Apply(opts...)
|
||||
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
c, err := ReadBackendConfigFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range c {
|
||||
cm.configs[cc.Name] = *cc
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
c, err := ReadBackendConfig(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
cl.configs[c.Name] = *c
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
v, exists := cl.configs[m]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []BackendConfig
|
||||
for _, v := range cl.configs {
|
||||
res = append(res, v)
|
||||
}
|
||||
|
||||
sort.SliceStable(res, func(i, j int) bool {
|
||||
return res[i].Name < res[j].Name
|
||||
})
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) ListBackendConfigs() []string {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []string
|
||||
for k := range cl.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Preload prepare models if they are not local but url or huggingface repositories
|
||||
func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
|
||||
status := func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
}
|
||||
|
||||
log.Info().Msgf("Preloading models from %s", modelPath)
|
||||
|
||||
renderMode := "dark"
|
||||
if os.Getenv("COLOR") != "" {
|
||||
renderMode = os.Getenv("COLOR")
|
||||
}
|
||||
|
||||
glamText := func(t string) {
|
||||
out, err := glamour.Render(t, renderMode)
|
||||
if err == nil && os.Getenv("NO_COLOR") == "" {
|
||||
fmt.Println(out)
|
||||
} else {
|
||||
fmt.Println(t)
|
||||
}
|
||||
}
|
||||
|
||||
for i, config := range cl.configs {
|
||||
|
||||
// Download files and verify their SHA
|
||||
for i, file := range config.DownloadFiles {
|
||||
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||
|
||||
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
|
||||
return err
|
||||
}
|
||||
// Create file path
|
||||
filePath := filepath.Join(modelPath, file.Filename)
|
||||
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
modelURL := config.PredictionOptions.Model
|
||||
modelURL = downloader.ConvertURL(modelURL)
|
||||
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
// md5 of model name
|
||||
md5Name := utils.MD5(modelURL)
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
c := &cc
|
||||
c.PredictionOptions.Model = md5Name
|
||||
cl.configs[i] = *c
|
||||
}
|
||||
if cl.configs[i].Name != "" {
|
||||
glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
|
||||
}
|
||||
if cl.configs[i].Description != "" {
|
||||
//glamText("**Description**")
|
||||
glamText(cl.configs[i].Description)
|
||||
}
|
||||
if cl.configs[i].Usage != "" {
|
||||
//glamText("**Usage**")
|
||||
glamText(cl.configs[i].Usage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadBackendConfigsFromPath reads all the configurations of the models from a path
|
||||
// (non-recursive)
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files = append(files, info)
|
||||
}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
|
||||
if err == nil {
|
||||
cm.configs[c.Name] = *c
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
317
core/config/backend_config_loader.go
Normal file
317
core/config/backend_config_loader.go
Normal file
@@ -0,0 +1,317 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/charmbracelet/glamour"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type BackendConfigLoader struct {
|
||||
configs map[string]BackendConfig
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
type LoadOptions struct {
|
||||
debug bool
|
||||
threads, ctxSize int
|
||||
f16 bool
|
||||
}
|
||||
|
||||
func LoadOptionDebug(debug bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.debug = debug
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionThreads(threads int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.threads = threads
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.ctxSize = ctxSize
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionF16(f16 bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.f16 = f16
|
||||
}
|
||||
}
|
||||
|
||||
type ConfigLoaderOption func(*LoadOptions)
|
||||
|
||||
func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
|
||||
for _, l := range options {
|
||||
l(lo)
|
||||
}
|
||||
}
|
||||
|
||||
// Load a config file for a model
|
||||
func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
cfgExisting, exists := cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
} else {
|
||||
// Try loading a model config file
|
||||
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cl.LoadBackendConfig(
|
||||
modelConfig, opts...,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
cfgExisting, exists = cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.SetDefaults(opts...)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func NewBackendConfigLoader() *BackendConfigLoader {
|
||||
return &BackendConfigLoader{
|
||||
configs: make(map[string]BackendConfig),
|
||||
}
|
||||
}
|
||||
func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
cc.SetDefaults(opts...)
|
||||
}
|
||||
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
lo := &LoadOptions{}
|
||||
lo.Apply(opts...)
|
||||
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
c, err := ReadBackendConfigFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range c {
|
||||
cm.configs[cc.Name] = *cc
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
c, err := ReadBackendConfig(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
cl.configs[c.Name] = *c
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
v, exists := cl.configs[m]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []BackendConfig
|
||||
for _, v := range cl.configs {
|
||||
res = append(res, v)
|
||||
}
|
||||
|
||||
sort.SliceStable(res, func(i, j int) bool {
|
||||
return res[i].Name < res[j].Name
|
||||
})
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) ListBackendConfigs() []string {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []string
|
||||
for k := range cl.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Preload prepare models if they are not local but url or huggingface repositories
|
||||
func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
|
||||
status := func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
}
|
||||
|
||||
log.Info().Msgf("Preloading models from %s", modelPath)
|
||||
|
||||
renderMode := "dark"
|
||||
if os.Getenv("COLOR") != "" {
|
||||
renderMode = os.Getenv("COLOR")
|
||||
}
|
||||
|
||||
glamText := func(t string) {
|
||||
out, err := glamour.Render(t, renderMode)
|
||||
if err == nil && os.Getenv("NO_COLOR") == "" {
|
||||
fmt.Println(out)
|
||||
} else {
|
||||
fmt.Println(t)
|
||||
}
|
||||
}
|
||||
|
||||
for i, config := range cl.configs {
|
||||
|
||||
// Download files and verify their SHA
|
||||
for i, file := range config.DownloadFiles {
|
||||
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||
|
||||
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
|
||||
return err
|
||||
}
|
||||
// Create file path
|
||||
filePath := filepath.Join(modelPath, file.Filename)
|
||||
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// If the model is an URL, expand it, and download the file
|
||||
if config.IsModelURL() {
|
||||
modelFileName := config.ModelFileName()
|
||||
modelURL := downloader.ConvertURL(config.Model)
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
c := &cc
|
||||
c.PredictionOptions.Model = modelFileName
|
||||
cl.configs[i] = *c
|
||||
}
|
||||
|
||||
if config.IsMMProjURL() {
|
||||
modelFileName := config.MMProjFileName()
|
||||
modelURL := downloader.ConvertURL(config.MMProj)
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
c := &cc
|
||||
c.MMProj = modelFileName
|
||||
cl.configs[i] = *c
|
||||
}
|
||||
|
||||
if cl.configs[i].Name != "" {
|
||||
glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
|
||||
}
|
||||
if cl.configs[i].Description != "" {
|
||||
//glamText("**Description**")
|
||||
glamText(cl.configs[i].Description)
|
||||
}
|
||||
if cl.configs[i].Usage != "" {
|
||||
//glamText("**Usage**")
|
||||
glamText(cl.configs[i].Usage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadBackendConfigsFromPath reads all the configurations of the models from a path
|
||||
// (non-recursive)
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
files = append(files, info)
|
||||
}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") ||
|
||||
strings.HasPrefix(file.Name(), ".") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
|
||||
if err == nil {
|
||||
cm.configs[c.Name] = *c
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"embed"
|
||||
"errors"
|
||||
"os"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/gofiber/contrib/fiberzerolog"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/filesystem"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
|
||||
// swagger handler
|
||||
@@ -44,6 +45,11 @@ func readAuthHeader(c *fiber.Ctx) string {
|
||||
return authHeader
|
||||
}
|
||||
|
||||
// Embed a directory
|
||||
//
|
||||
//go:embed static/*
|
||||
var embedDirStatic embed.FS
|
||||
|
||||
// @title LocalAI API
|
||||
// @version 2.0.0
|
||||
// @description The LocalAI Rest API.
|
||||
@@ -124,20 +130,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
// Check for api_keys.json file
|
||||
fileContent, err := os.ReadFile("api_keys.json")
|
||||
if err == nil {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err != nil {
|
||||
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
|
||||
}
|
||||
|
||||
// Add file keys to options.ApiKeys
|
||||
appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
|
||||
}
|
||||
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
@@ -174,13 +166,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
app.Use(c)
|
||||
}
|
||||
|
||||
// Make sure directories exists
|
||||
os.MkdirAll(appConfig.ImageDir, 0750)
|
||||
os.MkdirAll(appConfig.AudioDir, 0750)
|
||||
os.MkdirAll(appConfig.UploadDir, 0750)
|
||||
os.MkdirAll(appConfig.ConfigsDir, 0750)
|
||||
os.MkdirAll(appConfig.ModelPath, 0750)
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
@@ -192,10 +177,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
if !appConfig.DisableWebUI {
|
||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
}
|
||||
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
|
||||
|
||||
app.Use("/static", filesystem.New(filesystem.Config{
|
||||
Root: http.FS(embedDirStatic),
|
||||
PathPrefix: "static",
|
||||
Browse: true,
|
||||
}))
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
app.Use(notFoundHandler)
|
||||
|
||||
@@ -6,13 +6,14 @@ import (
|
||||
"github.com/chasefleming/elem-go"
|
||||
"github.com/chasefleming/elem-go/attrs"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/xsync"
|
||||
)
|
||||
|
||||
const (
|
||||
NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
|
||||
)
|
||||
|
||||
func DoneProgress(uid string) string {
|
||||
func DoneProgress(uid, text string) string {
|
||||
return elem.Div(
|
||||
attrs.Props{},
|
||||
elem.H3(
|
||||
@@ -22,7 +23,7 @@ func DoneProgress(uid string) string {
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text("Installation completed"),
|
||||
elem.Text(text),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
@@ -59,7 +60,7 @@ func ProgressBar(progress string) string {
|
||||
).Render()
|
||||
}
|
||||
|
||||
func StartProgressBar(uid, progress string) string {
|
||||
func StartProgressBar(uid, progress, text string) string {
|
||||
if progress == "" {
|
||||
progress = "0"
|
||||
}
|
||||
@@ -76,7 +77,7 @@ func StartProgressBar(uid, progress string) string {
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text("Installing"),
|
||||
elem.Text(text),
|
||||
// This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms.
|
||||
elem.Div(attrs.Props{
|
||||
"hx-get": "/browse/job/progress/" + uid,
|
||||
@@ -102,22 +103,43 @@ func cardSpan(text, icon string) elem.Node {
|
||||
)
|
||||
}
|
||||
|
||||
func ListModels(models []*gallery.GalleryModel) string {
|
||||
func ListModels(models []*gallery.GalleryModel, installing *xsync.SyncedMap[string, string]) string {
|
||||
//StartProgressBar(uid, "0")
|
||||
modelsElements := []elem.Node{}
|
||||
span := func(s string) elem.Node {
|
||||
return elem.Span(
|
||||
// span := func(s string) elem.Node {
|
||||
// return elem.Span(
|
||||
// attrs.Props{
|
||||
// "class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs",
|
||||
// },
|
||||
// elem.Text(s),
|
||||
// )
|
||||
// }
|
||||
deleteButton := func(m *gallery.GalleryModel) elem.Node {
|
||||
return elem.Button(
|
||||
attrs.Props{
|
||||
"class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs",
|
||||
"data-twe-ripple-init": "",
|
||||
"data-twe-ripple-color": "light",
|
||||
"class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/delete/model/" + m.Name,
|
||||
},
|
||||
elem.Text(s),
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fa-solid fa-cancel pr-2",
|
||||
},
|
||||
),
|
||||
elem.Text("Delete"),
|
||||
)
|
||||
}
|
||||
|
||||
installButton := func(m *gallery.GalleryModel) elem.Node {
|
||||
return elem.Button(
|
||||
attrs.Props{
|
||||
"data-twe-ripple-init": "",
|
||||
"data-twe-ripple-color": "light",
|
||||
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
|
||||
"hx-swap": "outerHTML",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name),
|
||||
},
|
||||
@@ -152,6 +174,9 @@ func ListModels(models []*gallery.GalleryModel) string {
|
||||
}
|
||||
|
||||
actionDiv := func(m *gallery.GalleryModel) elem.Node {
|
||||
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
|
||||
currentlyInstalling := installing.Exists(galleryID)
|
||||
|
||||
nodes := []elem.Node{
|
||||
cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
|
||||
}
|
||||
@@ -193,7 +218,20 @@ func ListModels(models []*gallery.GalleryModel) string {
|
||||
},
|
||||
nodes...,
|
||||
),
|
||||
elem.If(m.Installed, span("Installed"), installButton(m)),
|
||||
elem.If(
|
||||
currentlyInstalling,
|
||||
elem.Node( // If currently installing, show progress bar
|
||||
elem.Raw(StartProgressBar(installing.Get(galleryID), "0", "Installing")),
|
||||
), // Otherwise, show install button (if not installed) or display "Installed"
|
||||
elem.If(m.Installed,
|
||||
//elem.Node(elem.Div(
|
||||
// attrs.Props{},
|
||||
// span("Installed"), deleteButton(m),
|
||||
// )),
|
||||
deleteButton(m),
|
||||
installButton(m),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
|
||||
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.BackendMonitorRequest)
|
||||
@@ -23,7 +23,7 @@ func BackendMonitorEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error
|
||||
}
|
||||
}
|
||||
|
||||
func BackendShutdownEndpoint(bm services.BackendMonitor) func(c *fiber.Ctx) error {
|
||||
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.BackendMonitorRequest)
|
||||
// Get input data from the request body
|
||||
|
||||
@@ -74,6 +74,27 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
|
||||
}
|
||||
}
|
||||
|
||||
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelName := c.Params("name")
|
||||
|
||||
mgs.galleryApplier.C <- gallery.GalleryOp{
|
||||
Delete: true,
|
||||
GalleryName: modelName,
|
||||
}
|
||||
|
||||
uuid, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.JSON(struct {
|
||||
ID string `json:"uuid"`
|
||||
StatusURL string `json:"status"`
|
||||
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
}
|
||||
}
|
||||
|
||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||
|
||||
@@ -1,63 +1,23 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func ListModelsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, err := ml.ListModels()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var mm map[string]interface{} = map[string]interface{}{}
|
||||
|
||||
dataModels := []schema.OpenAIModel{}
|
||||
|
||||
var filterFn func(name string) bool
|
||||
// If blank, no filter is applied.
|
||||
filter := c.Query("filter")
|
||||
|
||||
// If filter is not specified, do not filter the list by model name
|
||||
if filter == "" {
|
||||
filterFn = func(_ string) bool { return true }
|
||||
} else {
|
||||
// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
|
||||
rxp, err := regexp.Compile(filter)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filterFn = func(name string) bool {
|
||||
return rxp.MatchString(name)
|
||||
}
|
||||
}
|
||||
|
||||
// By default, exclude any loose files that are already referenced by a configuration file.
|
||||
excludeConfigured := c.QueryBool("excludeConfigured", true)
|
||||
|
||||
// Start with the known configurations
|
||||
for _, c := range cl.GetAllBackendConfigs() {
|
||||
if excludeConfigured {
|
||||
mm[c.Model] = nil
|
||||
}
|
||||
|
||||
if filterFn(c.Name) {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
|
||||
}
|
||||
dataModels, err := lms.ListModels(filter, excludeConfigured)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Then iterate through the loose files:
|
||||
for _, m := range models {
|
||||
// And only adds them if they shouldn't be skipped.
|
||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||
}
|
||||
}
|
||||
|
||||
return c.JSON(struct {
|
||||
Object string `json:"object"`
|
||||
Data []schema.OpenAIModel `json:"data"`
|
||||
|
||||
@@ -23,6 +23,8 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
||||
|
||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
||||
|
||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
@@ -50,9 +52,9 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
|
||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
@@ -81,6 +82,7 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
||||
}
|
||||
|
||||
// models
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS))
|
||||
}
|
||||
|
||||
@@ -7,9 +7,13 @@ import (
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/elements"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/xsync"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -21,13 +25,19 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
galleryService *services.GalleryService,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
// Show the Models page
|
||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml))
|
||||
|
||||
// keeps the state of models that are being installed from the UI
|
||||
var installingModels = xsync.NewSyncedMap[string, string]()
|
||||
|
||||
// Show the Models page (all models)
|
||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Models",
|
||||
"Models": template.HTML(elements.ListModels(models)),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": template.HTML(elements.ListModels(models, installingModels)),
|
||||
"Repositories": appConfig.Galleries,
|
||||
// "ApplicationConfig": appConfig,
|
||||
}
|
||||
@@ -36,7 +46,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
return c.Render("views/models", summary)
|
||||
})
|
||||
|
||||
// HTMX: return the model details
|
||||
// Show the models, filtered from the user input
|
||||
// https://htmx.org/examples/active-search/
|
||||
app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
|
||||
form := struct {
|
||||
@@ -58,12 +68,19 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
}
|
||||
}
|
||||
|
||||
return c.SendString(elements.ListModels(filteredModels))
|
||||
return c.SendString(elements.ListModels(filteredModels, installingModels))
|
||||
})
|
||||
|
||||
/*
|
||||
|
||||
Install routes
|
||||
|
||||
*/
|
||||
|
||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
|
||||
galleryID := strings.Clone(c.Params("id")) // strings.Clone is required!
|
||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||
|
||||
id, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
@@ -72,6 +89,8 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
uid := id.String()
|
||||
|
||||
installingModels.Set(galleryID, uid)
|
||||
|
||||
op := gallery.GalleryOp{
|
||||
Id: uid,
|
||||
GalleryName: galleryID,
|
||||
@@ -81,9 +100,37 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
galleryService.C <- op
|
||||
}()
|
||||
|
||||
return c.SendString(elements.StartProgressBar(uid, "0"))
|
||||
return c.SendString(elements.StartProgressBar(uid, "0", "Installation"))
|
||||
})
|
||||
|
||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
|
||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||
|
||||
id, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uid := id.String()
|
||||
|
||||
installingModels.Set(galleryID, uid)
|
||||
|
||||
op := gallery.GalleryOp{
|
||||
Id: uid,
|
||||
Delete: true,
|
||||
GalleryName: galleryID,
|
||||
}
|
||||
go func() {
|
||||
galleryService.C <- op
|
||||
}()
|
||||
|
||||
return c.SendString(elements.StartProgressBar(uid, "0", "Deletion"))
|
||||
})
|
||||
|
||||
// Display the job current progress status
|
||||
// If the job is done, we trigger the /browse/job/:uid route
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
|
||||
jobUID := c.Params("uid")
|
||||
@@ -95,7 +142,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
}
|
||||
|
||||
if status.Progress == 100 {
|
||||
c.Set("HX-Trigger", "done")
|
||||
c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
|
||||
return c.SendString(elements.ProgressBar("100"))
|
||||
}
|
||||
if status.Error != nil {
|
||||
@@ -105,7 +152,122 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
|
||||
})
|
||||
|
||||
// this route is hit when the job is done, and we display the
|
||||
// final state (for now just displays "Installation completed")
|
||||
app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.DoneProgress(c.Params("uid")))
|
||||
|
||||
status := galleryService.GetStatus(c.Params("uid"))
|
||||
|
||||
for _, k := range installingModels.Keys() {
|
||||
if installingModels.Get(k) == c.Params("uid") {
|
||||
installingModels.Delete(k)
|
||||
}
|
||||
}
|
||||
|
||||
displayText := "Installation completed"
|
||||
if status.Deletion {
|
||||
displayText = "Deletion completed"
|
||||
}
|
||||
|
||||
return c.SendString(elements.DoneProgress(c.Params("uid"), displayText))
|
||||
})
|
||||
|
||||
// Show the Chat page
|
||||
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/chat", summary)
|
||||
})
|
||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
return c.SendString("No models available")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/chat", summary)
|
||||
})
|
||||
|
||||
app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/text2image", summary)
|
||||
})
|
||||
|
||||
app.Get("/text2image/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
return c.SendString("No models available")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/text2image", summary)
|
||||
})
|
||||
|
||||
app.Get("/tts/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate images with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/tts", summary)
|
||||
})
|
||||
|
||||
app.Get("/tts/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
return c.SendString("No models available")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/tts", summary)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterPagesRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
if !appConfig.DisableWelcomePage {
|
||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml))
|
||||
}
|
||||
}
|
||||
137
core/http/static/chat.js
Normal file
137
core/http/static/chat.js
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
|
||||
https://github.com/david-haerer/chatapi
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 David Härer
|
||||
Copyright (c) 2024 Ettore Di Giacinto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
*/
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
function submitPrompt(event) {
|
||||
event.preventDefault();
|
||||
|
||||
const input = document.getElementById("input").value;
|
||||
Alpine.store("chat").add("user", input);
|
||||
document.getElementById("input").value = "";
|
||||
const key = localStorage.getItem("key");
|
||||
|
||||
promptGPT(key, input);
|
||||
}
|
||||
|
||||
|
||||
async function promptGPT(key, input) {
|
||||
const model = document.getElementById("chat-model").value;
|
||||
// Set class "loader" to the element with "loader" id
|
||||
//document.getElementById("loader").classList.add("loader");
|
||||
// Make the "loader" visible
|
||||
document.getElementById("loader").style.display = "block";
|
||||
document.getElementById("input").disabled = true;
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
|
||||
// Source: https://stackoverflow.com/a/75751803/11386095
|
||||
const response = await fetch("/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
messages: Alpine.store("chat").messages(),
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: POST /v1/chat/completions ${response.status}</span>`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = response.body
|
||||
?.pipeThrough(new TextDecoderStream())
|
||||
.getReader();
|
||||
|
||||
if (!reader) {
|
||||
Alpine.store("chat").add(
|
||||
"assistant",
|
||||
`<span class='error'>Error: Failed to decode API response</span>`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
let dataDone = false;
|
||||
const arr = value.split("\n");
|
||||
arr.forEach((data) => {
|
||||
if (data.length === 0) return;
|
||||
if (data.startsWith(":")) return;
|
||||
if (data === "data: [DONE]") {
|
||||
dataDone = true;
|
||||
return;
|
||||
}
|
||||
const token = JSON.parse(data.substring(6)).choices[0].delta.content;
|
||||
if (!token) {
|
||||
return;
|
||||
}
|
||||
hljs.highlightAll();
|
||||
Alpine.store("chat").add("assistant", token);
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
});
|
||||
hljs.highlightAll();
|
||||
if (dataDone) break;
|
||||
}
|
||||
// Remove class "loader" from the element with "loader" id
|
||||
//document.getElementById("loader").classList.remove("loader");
|
||||
document.getElementById("loader").style.display = "none";
|
||||
// enable input
|
||||
document.getElementById("input").disabled = false;
|
||||
// scroll to the bottom of the chat
|
||||
document.getElementById('messages').scrollIntoView(false)
|
||||
// set focus to the input
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
document.getElementById("prompt").addEventListener("submit", submitPrompt);
|
||||
document.getElementById("input").focus();
|
||||
|
||||
const storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
}
|
||||
|
||||
marked.setOptions({
|
||||
highlight: function (code) {
|
||||
return hljs.highlightAuto(code).value;
|
||||
},
|
||||
});
|
||||
93
core/http/static/general.css
Normal file
93
core/http/static/general.css
Normal file
@@ -0,0 +1,93 @@
|
||||
body {
|
||||
font-family: 'Inter', sans-serif;
|
||||
}
|
||||
.chat-container { height: 90vh; display: flex; flex-direction: column; }
|
||||
.chat-messages { overflow-y: auto; flex-grow: 1; }
|
||||
.htmx-indicator{
|
||||
opacity:0;
|
||||
transition: opacity 10ms ease-in;
|
||||
}
|
||||
.htmx-request .htmx-indicator{
|
||||
opacity:1
|
||||
}
|
||||
/* Loader (https://cssloaders.github.io/) */
|
||||
.loader {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
display: block;
|
||||
margin:15px auto;
|
||||
position: relative;
|
||||
color: #FFF;
|
||||
box-sizing: border-box;
|
||||
animation: animloader 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes animloader {
|
||||
0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
25% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 2px; }
|
||||
50% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 2px, -38px 0 0 -2px; }
|
||||
75% { box-shadow: 14px 0 0 2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
100% { box-shadow: 14px 0 0 -2px, 38px 0 0 2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
}
|
||||
.progress {
|
||||
height: 20px;
|
||||
margin-bottom: 20px;
|
||||
overflow: hidden;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
|
||||
}
|
||||
.progress-bar {
|
||||
float: left;
|
||||
width: 0%;
|
||||
height: 100%;
|
||||
font-size: 12px;
|
||||
line-height: 20px;
|
||||
color: #fff;
|
||||
text-align: center;
|
||||
background-color: #337ab7;
|
||||
-webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
-webkit-transition: width .6s ease;
|
||||
-o-transition: width .6s ease;
|
||||
transition: width .6s ease;
|
||||
}
|
||||
|
||||
.user {
|
||||
background-color: #007bff;
|
||||
}
|
||||
|
||||
.assistant {
|
||||
background-color: #28a745;
|
||||
}
|
||||
|
||||
.message {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.user, .assistant {
|
||||
flex-grow: 1;
|
||||
margin: 0.5rem;
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style-type: disc; /* Adds bullet points */
|
||||
padding-left: 1.25rem; /* Indents the list from the left margin */
|
||||
margin-top: 1rem; /* Space above the list */
|
||||
}
|
||||
|
||||
li {
|
||||
font-size: 0.875rem; /* Small text size */
|
||||
color: #4a5568; /* Dark gray text */
|
||||
background-color: #f7fafc; /* Very light gray background */
|
||||
border-radius: 0.375rem; /* Rounded corners */
|
||||
padding: 0.5rem; /* Padding inside each list item */
|
||||
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */
|
||||
margin-bottom: 0.5rem; /* Vertical space between list items */
|
||||
}
|
||||
|
||||
li:last-child {
|
||||
margin-bottom: 0; /* Removes bottom margin from the last item */
|
||||
}
|
||||
96
core/http/static/image.js
Normal file
96
core/http/static/image.js
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
|
||||
https://github.com/david-haerer/chatapi
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 David Härer
|
||||
Copyright (c) 2024 Ettore Di Giacinto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
*/
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
|
||||
function genImage(event) {
|
||||
event.preventDefault();
|
||||
const input = document.getElementById("input").value;
|
||||
const key = localStorage.getItem("key");
|
||||
|
||||
promptDallE(key, input);
|
||||
|
||||
}
|
||||
|
||||
async function promptDallE(key, input) {
|
||||
document.getElementById("loader").style.display = "block";
|
||||
document.getElementById("input").value = "";
|
||||
document.getElementById("input").disabled = true;
|
||||
|
||||
const model = document.getElementById("image-model").value;
|
||||
const response = await fetch("/v1/images/generations", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
steps: 10,
|
||||
prompt: input,
|
||||
n: 1,
|
||||
size: "512x512",
|
||||
}),
|
||||
});
|
||||
const json = await response.json();
|
||||
if (json.error) {
|
||||
// Display error if there is one
|
||||
var div = document.getElementById('result'); // Get the div by its ID
|
||||
div.innerHTML = '<p style="color:red;">' + json.error.message + '</p>';
|
||||
return;
|
||||
}
|
||||
const url = json.data[0].url;
|
||||
|
||||
var div = document.getElementById('result'); // Get the div by its ID
|
||||
var img = document.createElement('img'); // Create a new img element
|
||||
img.src = url; // Set the source of the image
|
||||
img.alt = 'Generated image'; // Set the alt text of the image
|
||||
|
||||
div.innerHTML = ''; // Clear the existing content of the div
|
||||
div.appendChild(img); // Add the new img element to the div
|
||||
|
||||
document.getElementById("loader").style.display = "none";
|
||||
document.getElementById("input").disabled = false;
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
document.getElementById("input").focus();
|
||||
document.getElementById("genimage").addEventListener("submit", genImage);
|
||||
document.getElementById("loader").style.display = "none";
|
||||
|
||||
const storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
}
|
||||
|
||||
64
core/http/static/tts.js
Normal file
64
core/http/static/tts.js
Normal file
@@ -0,0 +1,64 @@
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
|
||||
function genAudio(event) {
|
||||
event.preventDefault();
|
||||
const input = document.getElementById("input").value;
|
||||
const key = localStorage.getItem("key");
|
||||
|
||||
tts(key, input);
|
||||
}
|
||||
|
||||
async function tts(key, input) {
|
||||
document.getElementById("loader").style.display = "block";
|
||||
document.getElementById("input").value = "";
|
||||
document.getElementById("input").disabled = true;
|
||||
|
||||
const model = document.getElementById("tts-model").value;
|
||||
const response = await fetch("/tts", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${key}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
input: input,
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
const jsonData = await response.json(); // Now safely parse JSON
|
||||
var div = document.getElementById('result');
|
||||
div.innerHTML = '<p style="color:red;">Error: ' +jsonData.error.message + '</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
var div = document.getElementById('result'); // Get the div by its ID
|
||||
var link=document.createElement('a');
|
||||
link.className = "m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong";
|
||||
link.innerHTML = "<i class='fa-solid fa-download'></i> Download result";
|
||||
const blob = await response.blob();
|
||||
link.href=window.URL.createObjectURL(blob);
|
||||
|
||||
div.innerHTML = ''; // Clear the existing content of the div
|
||||
div.appendChild(link); // Add the new img element to the div
|
||||
console.log(link)
|
||||
document.getElementById("loader").style.display = "none";
|
||||
document.getElementById("input").disabled = false;
|
||||
document.getElementById("input").focus();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
document.getElementById("input").focus();
|
||||
document.getElementById("tts").addEventListener("submit", genAudio);
|
||||
document.getElementById("loader").style.display = "none";
|
||||
|
||||
const storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
}
|
||||
|
||||
202
core/http/views/chat.html
Normal file
202
core/http/views/chat.html
Normal file
@@ -0,0 +1,202 @@
|
||||
<!--
|
||||
|
||||
Part of this page is based on the OpenAI Chatbot example by David Härer:
|
||||
https://github.com/david-haerer/chatapi
|
||||
|
||||
MIT License Copyright (c) 2023 David Härer
|
||||
Copyright (c) 2024 Ettore Di Giacinto
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
-->
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/chat.js"></script>
|
||||
<style>
|
||||
body {
|
||||
overflow: hidden;
|
||||
}
|
||||
</style>
|
||||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar"}}
|
||||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg" >
|
||||
<!-- Chat Header -->
|
||||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">
|
||||
|
||||
<div class="flex items-center justify-between">
|
||||
|
||||
<h1 class="text-lg font-semibold"> <i class="fa-solid fa-comments"></i> Chat with {{.Model}} <a href="https://localai.io/features/text-generation/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a></h1>
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
<button
|
||||
@click="$store.chat.clear()"
|
||||
id="clear"
|
||||
title="Clear chat history"
|
||||
|
||||
data-twe-ripple-init
|
||||
data-twe-ripple-color="light"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>
|
||||
Clear chat 🔥
|
||||
</button>
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
</div>
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
placeholder="OpenAI API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
🔒
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<!-- Options -->
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq .Name $model }}
|
||||
<option value="/chat/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ else }}
|
||||
<option value="/chat/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="chat-messages p-4" id="chat" x-data="{history: $store.chat.history}">
|
||||
<p id="usage" x-show="history.length === 0">
|
||||
Start chatting with the AI by typing a prompt in the input field below.
|
||||
</p>
|
||||
<div id="messages">
|
||||
<template x-for="message in history">
|
||||
<div class="message flex items-start space-x-2 my-2" >
|
||||
<!--<img :src="message.role === 'user' ? '/path/to/user-icon.png' : '/path/to/bot-icon.png'" alt="" class="h-6 w-6">-->
|
||||
<i class="fa-solid h-8 w-8" :class="message.role === 'user' ? 'fa-user' : 'fa-robot'" ></i>
|
||||
<div class="flex flex-col flex-1">
|
||||
<span class="text-xs font-semibold text-gray-600" x-text="message.role === 'user' ? 'User' : 'Assistant ({{.Model}})'"></span>
|
||||
<template x-if="message.role === 'user'">
|
||||
<div class="p-2 flex-1 rounded" :class="message.role" x-html="message.html"></div>
|
||||
</template>
|
||||
<template x-if="message.role === 'assistant'">
|
||||
<div class="p-2 flex-1 rounded" :class="message.role" x-html="message.html"></div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false }">
|
||||
<div id="loader" class="my-2 loader" style="display: none;"></div>
|
||||
<input id="chat-model" type="hidden" value="{{.Model}}">
|
||||
<form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
|
||||
<div class="relative w-full">
|
||||
<textarea
|
||||
id="input"
|
||||
name="input"
|
||||
x-model="inputValue"
|
||||
placeholder="Send a message..."
|
||||
class="p-2 pl-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
|
||||
required
|
||||
@keydown.shift="shiftPressed = true"
|
||||
@keyup.shift="shiftPressed = false"
|
||||
@keydown.enter="if (!shiftPressed) { submitPrompt($event); }"
|
||||
style="padding-right: 4rem;"
|
||||
></textarea>
|
||||
<button type=submit><i class="fa-solid fa-circle-up text-gray-300 absolute right-2 top-3 text-lg p-2 ml-2"></i></button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<script>
|
||||
document.addEventListener("alpine:init", () => {
|
||||
Alpine.store("chat", {
|
||||
history: [],
|
||||
languages: [undefined],
|
||||
clear() {
|
||||
this.history.length = 0;
|
||||
},
|
||||
add(role, content) {
|
||||
const N = this.history.length - 1;
|
||||
if (this.history.length && this.history[N].role === role) {
|
||||
this.history[N].content += content;
|
||||
str = this.history[N].content;
|
||||
this.history[N].html = DOMPurify.sanitize(
|
||||
marked.parse(this.history[N].content),
|
||||
);
|
||||
} else {
|
||||
c = ""
|
||||
// split content newlines in content
|
||||
const lines = content.split("\n");
|
||||
// for each line, do DOMPurify.sanitize(marked.parse(line)) and add it to c
|
||||
lines.forEach((line) => {
|
||||
c += DOMPurify.sanitize(marked.parse(line));
|
||||
});
|
||||
|
||||
this.history.push({
|
||||
role: role,
|
||||
content: content,
|
||||
html: c,
|
||||
});
|
||||
}
|
||||
|
||||
const parser = new DOMParser();
|
||||
const html = parser.parseFromString(
|
||||
this.history[this.history.length - 1].html,
|
||||
"text/html",
|
||||
);
|
||||
const code = html.querySelectorAll("pre code");
|
||||
if (!code.length) return;
|
||||
code.forEach((el) => {
|
||||
const language = el.className.split("language-")[1];
|
||||
if (this.languages.includes(language)) return;
|
||||
const script = document.createElement("script");
|
||||
script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
|
||||
document.head.appendChild(script);
|
||||
this.languages.push(language);
|
||||
});
|
||||
},
|
||||
messages() {
|
||||
return this.history.map((message) => {
|
||||
return {
|
||||
role: message.role,
|
||||
content: message.content,
|
||||
};
|
||||
});
|
||||
},
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -2,6 +2,28 @@
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{{.Title}}</title>
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
|
||||
/>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
|
||||
></script>
|
||||
|
||||
<link href="/static/general.css" rel="stylesheet" />
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
||||
<link
|
||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
||||
@@ -27,52 +49,4 @@
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
||||
<style>
|
||||
body {
|
||||
font-family: 'Inter', sans-serif;
|
||||
}
|
||||
/* Loader (https://cssloaders.github.io/) */
|
||||
.loader {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
display: block;
|
||||
margin:15px auto;
|
||||
position: relative;
|
||||
color: #FFF;
|
||||
box-sizing: border-box;
|
||||
animation: animloader 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes animloader {
|
||||
0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
25% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 2px; }
|
||||
50% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 2px, -38px 0 0 -2px; }
|
||||
75% { box-shadow: 14px 0 0 2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
100% { box-shadow: 14px 0 0 -2px, 38px 0 0 2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
}
|
||||
.progress {
|
||||
height: 20px;
|
||||
margin-bottom: 20px;
|
||||
overflow: hidden;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
|
||||
}
|
||||
.progress-bar {
|
||||
float: left;
|
||||
width: 0%;
|
||||
height: 100%;
|
||||
font-size: 12px;
|
||||
line-height: 20px;
|
||||
color: #fff;
|
||||
text-align: center;
|
||||
background-color: #337ab7;
|
||||
-webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
-webkit-transition: width .6s ease;
|
||||
-o-transition: width .6s ease;
|
||||
transition: width .6s ease;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
@@ -10,6 +10,9 @@
|
||||
<a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
|
||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
<a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
|
||||
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
||||
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
89
core/http/views/text2image.html
Normal file
89
core/http/views/text2image.html
Normal file
@@ -0,0 +1,89 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/image.js"></script>
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow " x-data="{ component: 'menu' }">
|
||||
|
||||
|
||||
<div class="mt-12">
|
||||
<div class="flex items-center justify-center text-center pb-2">
|
||||
<span class="text-3xl font-semibold text-gray-100">
|
||||
🖼️ Text to Image
|
||||
<a href="https://localai.io/models/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a>
|
||||
</span>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="text-center font-semibold text-gray-100">
|
||||
<div class="flex items-center justify-between">
|
||||
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
</div>
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
placeholder="OpenAI API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
🔒
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<!-- Options -->
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq .Name $model }}
|
||||
<option value="/text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ else }}
|
||||
<option value="/text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mt-12">
|
||||
<input id="image-model" type="hidden" value="{{.Model}}">
|
||||
<form id="genimage" action="/text2image/{{.Model}}" method="get">
|
||||
<input
|
||||
type="text"
|
||||
id="input"
|
||||
name="input"
|
||||
placeholder="Prompt…"
|
||||
autocomplete="off"
|
||||
class="p-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
|
||||
required
|
||||
/>
|
||||
</form>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="loader" class="my-2 loader" ></div>
|
||||
</div>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="result" class="mx-auto"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
86
core/http/views/tts.html
Normal file
86
core/http/views/tts.html
Normal file
@@ -0,0 +1,86 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/tts.js"></script>
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow " x-data="{ component: 'menu' }">
|
||||
<div class="mt-12">
|
||||
<div class="flex items-center justify-center text-center pb-2">
|
||||
<span class="text-3xl font-semibold text-gray-100">
|
||||
<i class="fa-solid fa-music"></i> Text to speech/audio
|
||||
<a href="https://localai.io/features/text-to-audio/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a>
|
||||
</span>
|
||||
|
||||
</div>
|
||||
<div class="text-center font-semibold text-gray-100">
|
||||
<div class="flex items-center justify-between">
|
||||
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
</div>
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
placeholder="OpenAI API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
🔒
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<select x-data="{ link : '' }" x-model="link" x-init="$watch('link', value => window.location = link)"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<!-- Options -->
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ $model:=.Model}}
|
||||
{{ range .ModelsConfig }}
|
||||
{{ if eq .Name $model }}
|
||||
<option value="/tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ else }}
|
||||
<option value="/tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
</select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mt-12">
|
||||
<input id="tts-model" type="hidden" value="{{.Model}}">
|
||||
<form id="tts" action="/tts/{{.Model}}" method="get">
|
||||
<input
|
||||
type="text"
|
||||
id="input"
|
||||
name="input"
|
||||
placeholder="Prompt…"
|
||||
autocomplete="off"
|
||||
class="p-2 border rounded w-full bg-gray-600 text-white placeholder-gray-300"
|
||||
required
|
||||
/>
|
||||
</form>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="loader" class="my-2 loader" ></div>
|
||||
</div>
|
||||
<div class="container max-w-screen-lg mx-auto mt-4 pb-10 flex justify-center">
|
||||
<div id="result" class="mx-auto"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -10,7 +10,7 @@ type Segment struct {
|
||||
Tokens []int `json:"tokens"`
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
type TranscriptionResult struct {
|
||||
Segments []Segment `json:"segments"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
@@ -15,22 +15,22 @@ import (
|
||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||
)
|
||||
|
||||
type BackendMonitor struct {
|
||||
configLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
|
||||
type BackendMonitorService struct {
|
||||
backendConfigLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
options *config.ApplicationConfig // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
|
||||
}
|
||||
|
||||
func NewBackendMonitor(configLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, appConfig *config.ApplicationConfig) BackendMonitor {
|
||||
return BackendMonitor{
|
||||
configLoader: configLoader,
|
||||
modelLoader: modelLoader,
|
||||
options: appConfig,
|
||||
func NewBackendMonitorService(modelLoader *model.ModelLoader, configLoader *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *BackendMonitorService {
|
||||
return &BackendMonitorService{
|
||||
modelLoader: modelLoader,
|
||||
backendConfigLoader: configLoader,
|
||||
options: appConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string, error) {
|
||||
config, exists := bm.configLoader.GetBackendConfig(modelName)
|
||||
func (bms BackendMonitorService) getModelLoaderIDFromModelName(modelName string) (string, error) {
|
||||
config, exists := bms.backendConfigLoader.GetBackendConfig(modelName)
|
||||
var backendId string
|
||||
if exists {
|
||||
backendId = config.Model
|
||||
@@ -46,8 +46,8 @@ func (bm BackendMonitor) getModelLoaderIDFromModelName(modelName string) (string
|
||||
return backendId, nil
|
||||
}
|
||||
|
||||
func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
|
||||
config, exists := bm.configLoader.GetBackendConfig(model)
|
||||
func (bms *BackendMonitorService) SampleLocalBackendProcess(model string) (*schema.BackendMonitorResponse, error) {
|
||||
config, exists := bms.backendConfigLoader.GetBackendConfig(model)
|
||||
var backend string
|
||||
if exists {
|
||||
backend = config.Model
|
||||
@@ -60,7 +60,7 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
|
||||
backend = fmt.Sprintf("%s.bin", backend)
|
||||
}
|
||||
|
||||
pid, err := bm.modelLoader.GetGRPCPID(backend)
|
||||
pid, err := bms.modelLoader.GetGRPCPID(backend)
|
||||
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("model", model).Msg("failed to find GRPC pid")
|
||||
@@ -101,12 +101,12 @@ func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*schema.Backe
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
|
||||
backendId, err := bm.getModelLoaderIDFromModelName(modelName)
|
||||
func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.StatusResponse, error) {
|
||||
backendId, err := bms.getModelLoaderIDFromModelName(modelName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
modelAddr := bm.modelLoader.CheckIsLoaded(backendId)
|
||||
modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
|
||||
if modelAddr == "" {
|
||||
return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||
}
|
||||
@@ -114,7 +114,7 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse
|
||||
status, rpcErr := modelAddr.GRPC(false, nil).Status(context.TODO())
|
||||
if rpcErr != nil {
|
||||
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
||||
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
||||
val, slbErr := bms.SampleLocalBackendProcess(backendId)
|
||||
if slbErr != nil {
|
||||
return nil, fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
|
||||
}
|
||||
@@ -131,10 +131,10 @@ func (bm BackendMonitor) CheckAndSample(modelName string) (*proto.StatusResponse
|
||||
return status, nil
|
||||
}
|
||||
|
||||
func (bm BackendMonitor) ShutdownModel(modelName string) error {
|
||||
backendId, err := bm.getModelLoaderIDFromModelName(modelName)
|
||||
func (bms BackendMonitorService) ShutdownModel(modelName string) error {
|
||||
backendId, err := bms.getModelLoaderIDFromModelName(modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return bm.modelLoader.ShutdownModel(backendId)
|
||||
return bms.modelLoader.ShutdownModel(backendId)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@@ -84,18 +85,47 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
}
|
||||
|
||||
var err error
|
||||
// if the request contains a gallery name, we apply the gallery from the gallery list
|
||||
if op.GalleryName != "" {
|
||||
if strings.Contains(op.GalleryName, "@") {
|
||||
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
} else {
|
||||
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
|
||||
// delete a model
|
||||
if op.Delete {
|
||||
modelConfig := &config.BackendConfig{}
|
||||
// Galleryname is the name of the model in this case
|
||||
dat, err := os.ReadFile(filepath.Join(g.modelPath, op.GalleryName+".yaml"))
|
||||
if err != nil {
|
||||
updateError(err)
|
||||
continue
|
||||
}
|
||||
} else if op.ConfigURL != "" {
|
||||
startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
|
||||
err = cl.Preload(g.modelPath)
|
||||
err = yaml.Unmarshal(dat, modelConfig)
|
||||
if err != nil {
|
||||
updateError(err)
|
||||
continue
|
||||
}
|
||||
|
||||
files := []string{}
|
||||
// Remove the model from the config
|
||||
if modelConfig.Model != "" {
|
||||
files = append(files, modelConfig.ModelFileName())
|
||||
}
|
||||
|
||||
if modelConfig.MMProj != "" {
|
||||
files = append(files, modelConfig.MMProjFileName())
|
||||
}
|
||||
|
||||
err = gallery.DeleteModelFromSystem(g.modelPath, op.GalleryName, files)
|
||||
} else {
|
||||
err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
|
||||
// if the request contains a gallery name, we apply the gallery from the gallery list
|
||||
if op.GalleryName != "" {
|
||||
if strings.Contains(op.GalleryName, "@") {
|
||||
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
} else {
|
||||
err = gallery.InstallModelFromGalleryByName(op.Galleries, op.GalleryName, g.modelPath, op.Req, progressCallback)
|
||||
}
|
||||
} else if op.ConfigURL != "" {
|
||||
startup.PreloadModelsConfigurations(op.ConfigURL, g.modelPath, op.ConfigURL)
|
||||
err = cl.Preload(g.modelPath)
|
||||
} else {
|
||||
err = prepareModel(g.modelPath, op.Req, cl, progressCallback)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -116,7 +146,12 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
|
||||
continue
|
||||
}
|
||||
|
||||
g.UpdateStatus(op.Id, &gallery.GalleryOpStatus{Processed: true, Message: "completed", Progress: 100})
|
||||
g.UpdateStatus(op.Id,
|
||||
&gallery.GalleryOpStatus{
|
||||
Deletion: op.Delete,
|
||||
Processed: true,
|
||||
Message: "completed",
|
||||
Progress: 100})
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
72
core/services/list_models.go
Normal file
72
core/services/list_models.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
type ListModelsService struct {
|
||||
bcl *config.BackendConfigLoader
|
||||
ml *model.ModelLoader
|
||||
appConfig *config.ApplicationConfig
|
||||
}
|
||||
|
||||
func NewListModelsService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *ListModelsService {
|
||||
return &ListModelsService{
|
||||
bcl: bcl,
|
||||
ml: ml,
|
||||
appConfig: appConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (lms *ListModelsService) ListModels(filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
|
||||
|
||||
models, err := lms.ml.ListModels()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var mm map[string]interface{} = map[string]interface{}{}
|
||||
|
||||
dataModels := []schema.OpenAIModel{}
|
||||
|
||||
var filterFn func(name string) bool
|
||||
|
||||
// If filter is not specified, do not filter the list by model name
|
||||
if filter == "" {
|
||||
filterFn = func(_ string) bool { return true }
|
||||
} else {
|
||||
// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
|
||||
rxp, err := regexp.Compile(filter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filterFn = func(name string) bool {
|
||||
return rxp.MatchString(name)
|
||||
}
|
||||
}
|
||||
|
||||
// Start with the known configurations
|
||||
for _, c := range lms.bcl.GetAllBackendConfigs() {
|
||||
if excludeConfigured {
|
||||
mm[c.Model] = nil
|
||||
}
|
||||
|
||||
if filterFn(c.Name) {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
|
||||
}
|
||||
}
|
||||
|
||||
// Then iterate through the loose files:
|
||||
for _, m := range models {
|
||||
// And only adds them if they shouldn't be skipped.
|
||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||
}
|
||||
}
|
||||
|
||||
return dataModels, nil
|
||||
}
|
||||
@@ -21,7 +21,6 @@ type configFileHandler struct {
|
||||
|
||||
watcher *fsnotify.Watcher
|
||||
|
||||
configDir string
|
||||
appConfig *config.ApplicationConfig
|
||||
}
|
||||
|
||||
@@ -30,11 +29,16 @@ type configFileHandler struct {
|
||||
func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
|
||||
c := configFileHandler{
|
||||
handlers: make(map[string]fileHandler),
|
||||
configDir: appConfig.DynamicConfigsDir,
|
||||
appConfig: appConfig,
|
||||
}
|
||||
c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
|
||||
c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
|
||||
err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
|
||||
}
|
||||
err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
@@ -45,16 +49,17 @@ func (c *configFileHandler) Register(filename string, handler fileHandler, runNo
|
||||
}
|
||||
c.handlers[filename] = handler
|
||||
if runNow {
|
||||
c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler)
|
||||
c.callHandler(filename, handler)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
|
||||
log.Trace().Str("filename", filename).Msg("reading file for dynamic config update")
|
||||
fileContent, err := os.ReadFile(filename)
|
||||
rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
|
||||
log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
|
||||
fileContent, err := os.ReadFile(rootedFilePath)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
log.Error().Err(err).Str("filename", filename).Msg("could not read file")
|
||||
log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
|
||||
}
|
||||
|
||||
if err = handler(fileContent, c.appConfig); err != nil {
|
||||
@@ -66,7 +71,8 @@ func (c *configFileHandler) Watch() error {
|
||||
configWatcher, err := fsnotify.NewWatcher()
|
||||
c.watcher = configWatcher
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory")
|
||||
log.Fatal().Err(err).Str("configdir", c.appConfig.DynamicConfigsDir).Msg("unable to create a watcher for configuration directory")
|
||||
|
||||
}
|
||||
|
||||
if c.appConfig.DynamicConfigsDirPollInterval > 0 {
|
||||
@@ -77,7 +83,7 @@ func (c *configFileHandler) Watch() error {
|
||||
<-ticker.C
|
||||
for file, handler := range c.handlers {
|
||||
log.Debug().Str("file", file).Msg("polling config file")
|
||||
c.callHandler(filepath.Join(c.appConfig.DynamicConfigsDir, file), handler)
|
||||
c.callHandler(file, handler)
|
||||
}
|
||||
}
|
||||
}()
|
||||
@@ -97,7 +103,7 @@ func (c *configFileHandler) Watch() error {
|
||||
continue
|
||||
}
|
||||
|
||||
c.callHandler(event.Name, handler)
|
||||
c.callHandler(filepath.Base(event.Name), handler)
|
||||
}
|
||||
case err, ok := <-c.watcher.Errors:
|
||||
log.Error().Err(err).Msg("config watcher error received")
|
||||
@@ -118,8 +124,8 @@ func (c *configFileHandler) Watch() error {
|
||||
}
|
||||
|
||||
// TODO: When we institute graceful shutdown, this should be called
|
||||
func (c *configFileHandler) Stop() {
|
||||
c.watcher.Close()
|
||||
func (c *configFileHandler) Stop() error {
|
||||
return c.watcher.Close()
|
||||
}
|
||||
|
||||
func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
@@ -100,7 +101,10 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
log.Debug().Msgf("Context canceled, shutting down")
|
||||
ml.StopAllGRPC()
|
||||
err := ml.StopAllGRPC()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error while stopping all grpc backends")
|
||||
}
|
||||
}()
|
||||
|
||||
if options.WatchDog {
|
||||
@@ -130,3 +134,33 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
log.Info().Msg("core/startup process completed!")
|
||||
return cl, ml, options, nil
|
||||
}
|
||||
|
||||
// In Lieu of a proper DI framework, this function wires up the Application manually.
|
||||
// This is in core/startup rather than core/state.go to keep package references clean!
|
||||
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
|
||||
app := &core.Application{
|
||||
ApplicationConfig: appConfig,
|
||||
BackendConfigLoader: config.NewBackendConfigLoader(),
|
||||
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
|
||||
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
app.GalleryService = services.NewGalleryService(app.ApplicationConfig.ModelPath)
|
||||
app.ListModelsService = services.NewListModelsService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
|
||||
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
|
||||
|
||||
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
|
||||
}
|
||||
|
||||
return app
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
weight: 20
|
||||
title: "Advanced"
|
||||
description: "Advanced usage"
|
||||
icon: science
|
||||
icon: settings
|
||||
lead: ""
|
||||
date: 2020-10-06T08:49:15+00:00
|
||||
lastmod: 2020-10-06T08:49:15+00:00
|
||||
|
||||
@@ -498,4 +498,28 @@ When using the `-core` container image it is possible to prepare the python back
|
||||
|
||||
```bash
|
||||
docker run --env EXTRA_BACKENDS="backend/python/diffusers" quay.io/go-skynet/local-ai:master-ffmpeg-core
|
||||
```
|
||||
```
|
||||
|
||||
### Concurrent requests
|
||||
|
||||
LocalAI supports parallel requests for the backends that supports it. For instance, vLLM and llama.cpp supports parallel requests, and thus LocalAI allows to run multiple requests in parallel.
|
||||
|
||||
In order to enable parallel requests, you have to pass `--parallel-requests` or set the `PARALLEL_REQUEST` to true as environment variable.
|
||||
|
||||
A list of the environment variable that tweaks parallelism is the following:
|
||||
|
||||
```
|
||||
### Python backends GRPC max workers
|
||||
### Default number of workers for GRPC Python backends.
|
||||
### This actually controls wether a backend can process multiple requests or not.
|
||||
# PYTHON_GRPC_MAX_WORKERS=1
|
||||
|
||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||
# LLAMACPP_PARALLEL=1
|
||||
|
||||
### Enable to run parallel requests
|
||||
# LOCALAI_PARALLEL_REQUESTS=true
|
||||
```
|
||||
|
||||
Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests.
|
||||
|
||||
|
||||
@@ -7,15 +7,10 @@ weight = 18
|
||||
url = '/models'
|
||||
+++
|
||||
|
||||
<h1 align="center">
|
||||
<br>
|
||||
<img height="300" src="https://github.com/go-skynet/model-gallery/assets/2420543/7a6a8183-6d0a-4dc4-8e1d-f2672fab354e"> <br>
|
||||
<br>
|
||||
</h1>
|
||||
The model gallery is a curated collection of models configurations for [LocalAI](https://github.com/go-skynet/LocalAI) that enables one-click install of models directly from the LocalAI Web interface.
|
||||
|
||||
The model gallery is a (experimental!) collection of models configurations for [LocalAI](https://github.com/go-skynet/LocalAI).
|
||||
LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API or the Web interface to configure, download and verify the model assets for you.
|
||||
|
||||
LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API to configure, download and verify the model assets for you. As the UI is still a work in progress, you will find here the documentation about the API Endpoints.
|
||||
|
||||
{{% alert note %}}
|
||||
The models in this gallery are not directly maintained by LocalAI. If you find a model that is not working, please open an issue on the model gallery repository.
|
||||
@@ -25,58 +20,55 @@ The models in this gallery are not directly maintained by LocalAI. If you find a
|
||||
GPT and text generation models might have a license which is not permissive for commercial use or might be questionable or without any license at all. Please check the model license before using it. The official gallery contains only open licensed models.
|
||||
{{% /alert %}}
|
||||
|
||||

|
||||
|
||||
## Useful Links and resources
|
||||
|
||||
- [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) - here you can find a list of the most performing models on the Open LLM benchmark. Keep in mind models compatible with LocalAI must be quantized in the `gguf` format.
|
||||
|
||||
## How it works
|
||||
|
||||
## Model repositories
|
||||
Navigate the WebUI interface in the "Models" section from the navbar at the top. Here you can find a list of models that can be installed, and you can install them by clicking the "Install" button.
|
||||
|
||||
## Add other galleries
|
||||
|
||||
You can add other galleries by setting the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example:
|
||||
|
||||
```json
|
||||
GALLERIES=[{"name":"<GALLERY_NAME>", "url":"<GALLERY_URL"}]
|
||||
```
|
||||
|
||||
The models in the gallery will be automatically indexed and available for installation.
|
||||
|
||||
## API Reference
|
||||
|
||||
### Model repositories
|
||||
|
||||
You can install a model in runtime, while the API is running and it is started already, or before starting the API by preloading the models.
|
||||
|
||||
To install a model in runtime you will need to use the `/models/apply` LocalAI API endpoint.
|
||||
|
||||
To enable the `model-gallery` repository you need to start `local-ai` with the `GALLERIES` environment variable:
|
||||
By default LocalAI is configured with the `localai` repository.
|
||||
|
||||
To use additional repositories you need to start `local-ai` with the `GALLERIES` environment variable:
|
||||
|
||||
```
|
||||
GALLERIES=[{"name":"<GALLERY_NAME>", "url":"<GALLERY_URL"}]
|
||||
```
|
||||
|
||||
For example, to enable the `model-gallery` repository, start `local-ai` with:
|
||||
For example, to enable the default `localai` repository, you can start `local-ai` with:
|
||||
|
||||
```
|
||||
GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||
GALLERIES=[{"name":"localai", "url":"github:mudler/localai/gallery/index.yaml"}]
|
||||
```
|
||||
|
||||
where `github:go-skynet/model-gallery/index.yaml` will be expanded automatically to `https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml`.
|
||||
where `github:mudler/localai/gallery/index.yaml` will be expanded automatically to `https://raw.githubusercontent.com/mudler/LocalAI/main/index.yaml`.
|
||||
|
||||
Note: the url are expanded automatically for `github` and `huggingface`, however `https://` and `http://` prefix works as well.
|
||||
|
||||
{{% alert note %}}
|
||||
|
||||
As this feature is experimental, you need to run `local-ai` with a list of `GALLERIES`. Currently there are two galleries:
|
||||
|
||||
- An official one, containing only definitions and models with a clear LICENSE to avoid any dmca infringment. As I'm not sure what's the best action to do in this case, I'm not going to include any model that is not clearly licensed in this repository which is offically linked to LocalAI.
|
||||
- A "community" one that contains an index of `huggingface` models that are compatible with the `ggml` format and lives in the `localai-huggingface-zoo` repository.
|
||||
|
||||
To enable the two repositories, start `LocalAI` with the `GALLERIES` environment variable:
|
||||
|
||||
```bash
|
||||
GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]
|
||||
```
|
||||
|
||||
If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use.
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
{{% alert note %}}
|
||||
You might not find all the models in this gallery. Automated CI updates the gallery automatically. You can find however most of the models on huggingface (https://huggingface.co/), generally it should be available `~24h` after upload.
|
||||
|
||||
By under any circumstances LocalAI and any developer is not responsible for the models in this gallery, as CI is just indexing them and providing a convenient way to install with an automatic configuration with a consistent API. Don't install models from authors you don't trust, and, check the appropriate license for your use case. Models are automatically indexed and hosted on huggingface (https://huggingface.co/). For any issue with the models, please open an issue on the model gallery repository if it's a LocalAI misconfiguration, otherwise refer to the huggingface repository. If you think a model should not be listed, please reach to us and we will remove it from the gallery.
|
||||
{{% /alert %}}
|
||||
|
||||
{{% alert note %}}
|
||||
|
||||
There is no documentation yet on how to build a gallery or a repository - but you can find an example in the [model-gallery](https://github.com/go-skynet/model-gallery) repository.
|
||||
|
||||
If you want to build your own gallery, there is no documentation yet. However you can find the source of the default gallery in the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery).
|
||||
{{% /alert %}}
|
||||
|
||||
|
||||
@@ -110,34 +102,16 @@ To install a model from the gallery repository, you can pass the model name in t
|
||||
```bash
|
||||
LOCALAI=http://localhost:8080
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"id": "model-gallery@bert-embeddings"
|
||||
"id": "localai@bert-embeddings"
|
||||
}'
|
||||
```
|
||||
|
||||
where:
|
||||
- `model-gallery` is the repository. It is optional and can be omitted. If the repository is omitted LocalAI will search the model by name in all the repositories. In the case the same model name is present in both galleries the first match wins.
|
||||
- `localai` is the repository. It is optional and can be omitted. If the repository is omitted LocalAI will search the model by name in all the repositories. In the case the same model name is present in both galleries the first match wins.
|
||||
- `bert-embeddings` is the model name in the gallery
|
||||
(read its [config here](https://github.com/go-skynet/model-gallery/blob/main/bert-embeddings.yaml)).
|
||||
(read its [config here](https://github.com/mudler/LocalAI/tree/master/gallery/blob/main/bert-embeddings.yaml)).
|
||||
|
||||
{{% alert note %}}
|
||||
If the `huggingface` model gallery is enabled (it's enabled by default),
|
||||
and the model has an entry in the model gallery's associated YAML config
|
||||
(for `huggingface`, see [`model-gallery/huggingface.yaml`](https://github.com/go-skynet/model-gallery/blob/main/huggingface.yaml)),
|
||||
you can install models by specifying directly the model's `id`.
|
||||
For example, to install wizardlm superhot:
|
||||
|
||||
```bash
|
||||
LOCALAI=http://localhost:8080
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"id": "huggingface@TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GGML/wizardlm-13b-v1.0-superhot-8k.ggmlv3.q4_K_M.bin"
|
||||
}'
|
||||
```
|
||||
|
||||
Note that the `id` can be used similarly when pre-loading models at start.
|
||||
{{% /alert %}}
|
||||
|
||||
|
||||
## How to install a model (without a gallery)
|
||||
### How to install a model not part of a gallery
|
||||
|
||||
If you don't want to set any gallery repository, you can still install models by loading a model configuration file.
|
||||
|
||||
@@ -201,13 +175,13 @@ Note: `url` or `id` must be specified. `url` is used to a url to a model gallery
|
||||
For example:
|
||||
|
||||
```bash
|
||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]
|
||||
PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]
|
||||
```
|
||||
|
||||
or as arg:
|
||||
|
||||
```bash
|
||||
local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]'
|
||||
local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]'
|
||||
```
|
||||
|
||||
or in a YAML file:
|
||||
@@ -218,14 +192,14 @@ local-ai --preload-models-config "/path/to/yaml"
|
||||
|
||||
YAML:
|
||||
```yaml
|
||||
- url: github:go-skynet/model-gallery/stablediffusion.yaml
|
||||
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
{{% alert note %}}
|
||||
|
||||
You can find already some open licensed models in the [model gallery](https://github.com/go-skynet/model-gallery).
|
||||
You can find already some open licensed models in the [LocalAI gallery](https://github.com/mudler/LocalAI/tree/master/gallery).
|
||||
|
||||
If you don't find the model in the gallery you can try to use the "base" model and provide an URL to LocalAI:
|
||||
|
||||
@@ -233,7 +207,7 @@ If you don't find the model in the gallery you can try to use the "base" model a
|
||||
|
||||
```
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:go-skynet/model-gallery/base.yaml",
|
||||
"url": "github:mudler/LocalAI/gallery/base.yaml@master",
|
||||
"name": "model-name",
|
||||
"files": [
|
||||
{
|
||||
@@ -249,7 +223,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
## Installing a model with a different name
|
||||
### Override a model name
|
||||
|
||||
To install a model with a different name, specify a `name` parameter in the request body.
|
||||
|
||||
@@ -266,11 +240,11 @@ For example, to install a model as `gpt-3.5-turbo`:
|
||||
```bash
|
||||
LOCALAI=http://localhost:8080
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:go-skynet/model-gallery/gpt4all-j.yaml",
|
||||
"url": "github:mudler/LocalAI/gallery/gpt4all-j.yaml",
|
||||
"name": "gpt-3.5-turbo"
|
||||
}'
|
||||
```
|
||||
## Additional Files
|
||||
### Additional Files
|
||||
|
||||
<details>
|
||||
|
||||
@@ -293,7 +267,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
|
||||
</details>
|
||||
|
||||
## Overriding configuration files
|
||||
### Overriding configuration files
|
||||
|
||||
<details>
|
||||
|
||||
@@ -324,7 +298,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
|
||||
```bash
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:go-skynet/model-gallery/bert-embeddings.yaml",
|
||||
"url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
|
||||
"name": "text-embedding-ada-002"
|
||||
}'
|
||||
```
|
||||
@@ -348,10 +322,10 @@ URL: https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
{{< tabs >}}
|
||||
{{% tab name="Prepare the model in runtime" %}}
|
||||
|
||||
While the API is running, you can install the model by using the `/models/apply` endpoint and point it to the `stablediffusion` model in the [models-gallery](https://github.com/go-skynet/model-gallery#image-generation-stable-diffusion):
|
||||
While the API is running, you can install the model by using the `/models/apply` endpoint and point it to the `stablediffusion` model in the [models-gallery](https://github.com/mudler/LocalAI/tree/master/gallery#image-generation-stable-diffusion):
|
||||
```bash
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:go-skynet/model-gallery/stablediffusion.yaml"
|
||||
"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -361,13 +335,13 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
You can set the `PRELOAD_MODELS` environment variable:
|
||||
|
||||
```bash
|
||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]
|
||||
PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]
|
||||
```
|
||||
|
||||
or as arg:
|
||||
|
||||
```bash
|
||||
local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/stablediffusion.yaml"}]'
|
||||
local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/stablediffusion.yaml@master"}]'
|
||||
```
|
||||
|
||||
or in a YAML file:
|
||||
@@ -378,7 +352,7 @@ local-ai --preload-models-config "/path/to/yaml"
|
||||
|
||||
YAML:
|
||||
```yaml
|
||||
- url: github:go-skynet/model-gallery/stablediffusion.yaml
|
||||
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
|
||||
```
|
||||
|
||||
{{% /tab %}}
|
||||
@@ -403,7 +377,7 @@ URL: https://github.com/ggerganov/whisper.cpp
|
||||
|
||||
```bash
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:go-skynet/model-gallery/whisper-base.yaml",
|
||||
"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master",
|
||||
"name": "whisper-1"
|
||||
}'
|
||||
```
|
||||
@@ -414,13 +388,13 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
You can set the `PRELOAD_MODELS` environment variable:
|
||||
|
||||
```bash
|
||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}]
|
||||
PRELOAD_MODELS=[{"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1"}]
|
||||
```
|
||||
|
||||
or as arg:
|
||||
|
||||
```bash
|
||||
local-ai --preload-models '[{"url": "github:go-skynet/model-gallery/whisper-base.yaml", "name": "whisper-1"}]'
|
||||
local-ai --preload-models '[{"url": "github:mudler/LocalAI/gallery/whisper-base.yaml@master", "name": "whisper-1"}]'
|
||||
```
|
||||
|
||||
or in a YAML file:
|
||||
@@ -431,37 +405,13 @@ local-ai --preload-models-config "/path/to/yaml"
|
||||
|
||||
YAML:
|
||||
```yaml
|
||||
- url: github:go-skynet/model-gallery/whisper-base.yaml
|
||||
- url: github:mudler/LocalAI/gallery/whisper-base.yaml@master
|
||||
name: whisper-1
|
||||
```
|
||||
|
||||
{{% /tab %}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### GPTs
|
||||
|
||||
<details>
|
||||
|
||||
```bash
|
||||
LOCALAI=http://localhost:8080
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:go-skynet/model-gallery/gpt4all-j.yaml",
|
||||
"name": "gpt4all-j"
|
||||
}'
|
||||
```
|
||||
|
||||
To test it:
|
||||
|
||||
```
|
||||
curl $LOCALAI/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt4all-j",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.1
|
||||
}'
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Note
|
||||
|
||||
LocalAI will create a batch process that downloads the required files from a model definition and automatically reload itself to include the new model.
|
||||
@@ -495,7 +445,7 @@ Returns an `uuid` and an `url` to follow up the state of the process:
|
||||
{ "uuid":"251475c9-f666-11ed-95e0-9a8a4480ac58", "status":"http://localhost:8080/models/jobs/251475c9-f666-11ed-95e0-9a8a4480ac58"}
|
||||
```
|
||||
|
||||
To see a collection example of curated models definition files, see the [model-gallery](https://github.com/go-skynet/model-gallery).
|
||||
To see a collection example of curated models definition files, see the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery).
|
||||
|
||||
#### Get model job state `/models/jobs/<uid>`
|
||||
|
||||
|
||||
57
docs/content/docs/features/reranker.md
Normal file
57
docs/content/docs/features/reranker.md
Normal file
@@ -0,0 +1,57 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = " Reranker"
|
||||
weight = 11
|
||||
url = "/features/reranker/"
|
||||
+++
|
||||
|
||||
A **reranking** model, often referred to as a cross-encoder, is a core component in the two-stage retrieval systems used in information retrieval and natural language processing tasks.
|
||||
Given a query and a set of documents, it will output similarity scores.
|
||||
|
||||
We can use then the score to reorder the documents by relevance in our RAG system to increase its overall accuracy and filter out non-relevant results.
|
||||
|
||||

|
||||
|
||||
LocalAI supports reranker models, and you can use them by using the `rerankers` backend, which uses [rerankers](https://github.com/AnswerDotAI/rerankers).
|
||||
|
||||
## Usage
|
||||
|
||||
You can test `rerankers` by using container images with python (this does **NOT** work with `core` images) and a model config file like this, or by installing `cross-encoder` from the gallery in the UI:
|
||||
|
||||
```yaml
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
# optionally:
|
||||
# type: flashrank
|
||||
# diffusers:
|
||||
# pipeline_type: en # to specify the english language
|
||||
```
|
||||
|
||||
and test it with:
|
||||
|
||||
```bash
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
```
|
||||
@@ -163,3 +163,7 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"input":"Hello!"
|
||||
}' | aplay
|
||||
```
|
||||
|
||||
## Parler-tts
|
||||
|
||||
`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts
|
||||
@@ -154,11 +154,11 @@ cd LocalAI
|
||||
# build the binary
|
||||
make build
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
# Download phi-2 to models/
|
||||
wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O models/phi-2.Q2_K
|
||||
|
||||
# Use a template from the examples
|
||||
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
|
||||
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl
|
||||
|
||||
# Run LocalAI
|
||||
./local-ai --models-path=./models/ --debug=true
|
||||
@@ -167,17 +167,29 @@ cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
|
||||
curl http://localhost:8080/v1/models
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-gpt4all-j",
|
||||
"model": "phi-2.Q2_K",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.9
|
||||
}'
|
||||
```
|
||||
|
||||
#### Troublshooting mac
|
||||
#### Troubleshooting mac
|
||||
|
||||
If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store.
|
||||
If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256).
|
||||
If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
|
||||
- If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store.
|
||||
|
||||
- After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK.
|
||||
|
||||
```
|
||||
# print /Library/Developer/CommandLineTools, if command line tools were installed in advance
|
||||
xcode-select --print-path
|
||||
|
||||
# point to a complete SDK
|
||||
sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
|
||||
```
|
||||
|
||||
- If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256).
|
||||
|
||||
- If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
|
||||
|
||||
```
|
||||
# reinstall build dependencies
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "Available Container images"
|
||||
weight = 25
|
||||
title = "Run with container images"
|
||||
weight = 6
|
||||
url = '/basics/container/'
|
||||
ico = "rocket_launch"
|
||||
+++
|
||||
|
||||
LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai).
|
||||
|
||||
> _For All-in-One image with a pre-configured set of models and backends, see the [AIO Images]({{%relref "docs/reference/aio-images" %}})._
|
||||
All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed.
|
||||
|
||||
For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}).
|
||||
|
||||
@@ -22,6 +23,62 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
## All-in-one images
|
||||
|
||||
All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
|
||||
|
||||
In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
|
||||
|
||||
| Category | Model name | Real model (CPU) | Real model (GPU) |
|
||||
| ---- | ---- | ---- | ---- |
|
||||
| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
|
||||
| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
|
||||
| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
|
||||
| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
|
||||
| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
|
||||
| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
|
||||
|
||||
### Usage
|
||||
|
||||
Select the image (CPU or GPU) and start the container with Docker:
|
||||
|
||||
```bash
|
||||
# CPU example
|
||||
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models).
|
||||
|
||||
### Available images
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-----------------------------------------------|
|
||||
| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` |
|
||||
| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` |
|
||||
| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` |
|
||||
| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` |
|
||||
| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` |
|
||||
| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` |
|
||||
| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` |
|
||||
|
||||
### Available environment variables
|
||||
|
||||
The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ---------------------| ------- | ----------- |
|
||||
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
|
||||
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) |
|
||||
|
||||
|
||||
## Standard container images
|
||||
|
||||
Standard container images do not have pre-installed models.
|
||||
|
||||
Images are available with and without python dependencies. Note that images with python dependencies are bigger (in order of 17GB).
|
||||
|
||||
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
||||
|
||||
{{< tabs tabTotal="6" >}}
|
||||
{{% tab tabName="Vanilla / CPU Images" %}}
|
||||
|
||||
@@ -100,4 +157,3 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
|
||||
## See Also
|
||||
|
||||
- [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}})
|
||||
- [AIO Images]({{%relref "docs/reference/aio-images" %}})
|
||||
30
docs/content/docs/getting-started/kubernetes.md
Normal file
30
docs/content/docs/getting-started/kubernetes.md
Normal file
@@ -0,0 +1,30 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "Run with Kubernetes"
|
||||
weight = 6
|
||||
url = '/basics/kubernetes/'
|
||||
ico = "rocket_launch"
|
||||
+++
|
||||
|
||||
For installing LocalAI in Kubernetes, you can use the `go-skynet` helm chart:
|
||||
|
||||
```bash
|
||||
# Install the helm repository
|
||||
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
|
||||
# Update the repositories
|
||||
helm repo update
|
||||
# Get the values
|
||||
helm show values go-skynet/local-ai > values.yaml
|
||||
|
||||
# Edit the values value if needed
|
||||
# vim values.yaml ...
|
||||
|
||||
# Install the helm chart
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
|
||||
If you prefer to install from manifest file, you can install from the deployment file, and customize as you like:
|
||||
|
||||
```
|
||||
kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml
|
||||
```
|
||||
@@ -131,22 +131,7 @@ Note: If you are on Windows, please make sure the project is on the Linux Filesy
|
||||
|
||||
{{% tab tabName="Kubernetes" %}}
|
||||
|
||||
For installing LocalAI in Kubernetes, you can use the following helm chart:
|
||||
|
||||
```bash
|
||||
# Install the helm repository
|
||||
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
|
||||
# Update the repositories
|
||||
helm repo update
|
||||
# Get the values
|
||||
helm show values go-skynet/local-ai > values.yaml
|
||||
|
||||
# Edit the values value if needed
|
||||
# vim values.yaml ...
|
||||
|
||||
# Install the helm chart
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
See the [Kubernetes section]({{%relref "docs/getting-started/kubernetes" %}}).
|
||||
|
||||
{{% /tab %}}
|
||||
{{% tab tabName="From binary" %}}
|
||||
|
||||
@@ -30,7 +30,7 @@ Before you begin, ensure you have a container engine installed if you are not us
|
||||
|
||||
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}}) or [Run other models]({{%relref "docs/getting-started/run-other-models" %}}) to use an already-configured model_.
|
||||
|
||||
LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset.
|
||||
LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. If you don't need models pre-configured, you can use the standard [images]({{%relref "docs/getting-started/container-images" %}}).
|
||||
|
||||
These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration.
|
||||
|
||||
@@ -91,7 +91,7 @@ services:
|
||||
# capabilities: [gpu]
|
||||
```
|
||||
|
||||
For a list of all the container-images available, see [Container images]({{%relref "docs/reference/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/reference/aio-images" %}}).
|
||||
For a list of all the container-images available, see [Container images]({{%relref "docs/getting-started/container-images" %}}). To learn more about All-in-one images instead, see [All-in-one Images]({{%relref "docs/getting-started/container-images" %}}).
|
||||
|
||||
{{% alert icon="💡" %}}
|
||||
|
||||
@@ -114,9 +114,36 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca
|
||||
|
||||
{{% /alert %}}
|
||||
|
||||
## From binary
|
||||
|
||||
LocalAI is available as a standalone binary as well. Binaries are compiled for Linux and MacOS and automatically uploaded in the Github releases. Windows is known to work with WSL.
|
||||
|
||||
You can check out the releases in https://github.com/mudler/LocalAI/releases.
|
||||
|
||||
{{< tabs tabTotal="2" >}}
|
||||
{{% tab tabName="Linux" %}}
|
||||
| CPU flagset | Link |
|
||||
| --- | --- |
|
||||
| avx2 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx2-Linux-x86_64) |
|
||||
| avx512 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx512-Linux-x86_64) |
|
||||
| avx | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx-Linux-x86_64) |
|
||||
{{% /tab %}}
|
||||
{{% tab tabName="MacOS" %}}
|
||||
| CPU flagset | Link |
|
||||
| --- | --- |
|
||||
| avx2 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx2-Darwin-arm64) |
|
||||
| avx512 | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx512-Darwin-arm64) |
|
||||
| avx | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-avx-Darwin-arm64) |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
{{< /tabs >}}
|
||||
|
||||
## Try it out
|
||||
|
||||
LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [Integrations]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`, you can find few examples below.
|
||||
Connect to LocalAI, by default the WebUI should be accessible from http://localhost:8080 . You can also use 3rd party projects to interact with LocalAI as you would use OpenAI (see also [Integrations]({{%relref "docs/integrations" %}}) ).
|
||||
|
||||
You can also test out the API endpoints using `curl`, examples below.
|
||||
|
||||
### Text Generation
|
||||
|
||||
@@ -300,6 +327,6 @@ Explore further resources and community contributions:
|
||||
- [Build LocalAI and the container image]({{%relref "docs/getting-started/build" %}})
|
||||
- [Run models manually]({{%relref "docs/getting-started/manual" %}})
|
||||
- [Run other models]({{%relref "docs/getting-started/run-other-models" %}})
|
||||
- [Container images]({{%relref "docs/reference/container-images" %}})
|
||||
- [All-in-one Images]({{%relref "docs/reference/aio-images" %}})
|
||||
- [Container images]({{%relref "docs/getting-started/container-images" %}})
|
||||
- [All-in-one Images]({{%relref "docs/getting-started/container-images" %}})
|
||||
- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
disableToc = false
|
||||
title = "Integrations"
|
||||
weight = 19
|
||||
icon = "rocket_launch"
|
||||
icon = "sync"
|
||||
|
||||
+++
|
||||
|
||||
|
||||
@@ -99,8 +99,9 @@ Note that this started just as a fun weekend project by [mudler](https://github.
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 💾 [Stores](https://localai.io/stores)
|
||||
- 🆕 [Reranker](https://localai.io/features/reranker/)
|
||||
|
||||
## Contribute and help
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
weight: 23
|
||||
title: "References"
|
||||
description: "Reference"
|
||||
icon: science
|
||||
icon: menu_book
|
||||
lead: ""
|
||||
date: 2020-10-06T08:49:15+00:00
|
||||
lastmod: 2020-10-06T08:49:15+00:00
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "All-In-One images"
|
||||
weight = 26
|
||||
+++
|
||||
|
||||
All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
|
||||
|
||||
In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
|
||||
|
||||
| Category | Model name | Real model (CPU) | Real model (GPU) |
|
||||
| ---- | ---- | ---- | ---- |
|
||||
| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
|
||||
| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
|
||||
| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
|
||||
| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
|
||||
| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
|
||||
| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
|
||||
|
||||
## Usage
|
||||
|
||||
Select the image (CPU or GPU) and start the container with Docker:
|
||||
|
||||
```bash
|
||||
# CPU example
|
||||
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
|
||||
```
|
||||
|
||||
LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models).
|
||||
|
||||
## Available images
|
||||
|
||||
| Description | Quay | Docker Hub |
|
||||
| --- | --- |-----------------------------------------------|
|
||||
| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` |
|
||||
| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` |
|
||||
| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` |
|
||||
| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` |
|
||||
| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` |
|
||||
| Latest images for Intel GPU (sycl f16) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f16` | `localai/localai:latest-aio-gpu-intel-f16` |
|
||||
| Latest images for Intel GPU (sycl f32) | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel-f32` | `localai/localai:latest-aio-gpu-intel-f32` |
|
||||
|
||||
## Available environment variables
|
||||
|
||||
The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ---------------------| ------- | ----------- |
|
||||
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
|
||||
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) |
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.12.4"
|
||||
"version": "v2.13.0"
|
||||
}
|
||||
|
||||
41
gallery/chatml.yaml
Normal file
41
gallery/chatml.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
name: "chatml"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{- if .FunctionCall }}
|
||||
Function call:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
Function response:
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
<|im_end|>
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
@@ -31,6 +31,29 @@
|
||||
- python
|
||||
## LLMs
|
||||
### START LLAMA3
|
||||
- name: "einstein-v6.1-llama3-8b"
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6468ce47e134d050a58aa89c/5s12oq859qLfDkkTNam_C.png
|
||||
urls:
|
||||
- https://huggingface.co/Weyaxi/Einstein-v6.1-Llama3-8B
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- llama3
|
||||
license: llama3
|
||||
description: |
|
||||
This model is a full fine-tuned version of meta-llama/Meta-Llama-3-8B on diverse datasets.
|
||||
|
||||
This model is finetuned using 8xRTX3090 + 1xRTXA6000 using axolotl.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf
|
||||
sha256: 3ef96fd6e32658774b3c8fbc24088787dfa911288e272b186f448c886400d30d
|
||||
uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
name: "llama3-8b-instruct"
|
||||
@@ -105,6 +128,13 @@
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/Llama-3-SauerkrautLM-8b-Instruct-GGUF
|
||||
icon: https://vago-solutions.ai/wp-content/uploads/2024/04/Llama3-Pic.png
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- llama3
|
||||
- german
|
||||
description: |
|
||||
SauerkrautLM-llama-3-8B-Instruct
|
||||
|
||||
@@ -195,6 +225,23 @@
|
||||
- filename: lexi-llama-3-8b-uncensored.Q6_K.gguf
|
||||
sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436
|
||||
uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf
|
||||
- <<: *llama3
|
||||
name: "llama-3-lewdplay-8b-evo"
|
||||
urls:
|
||||
- https://huggingface.co/Undi95/Llama-3-LewdPlay-8B-evo-GGUF
|
||||
description: |
|
||||
This is a merge of pre-trained language models created using mergekit.
|
||||
|
||||
The new EVOLVE merge method was used (on MMLU specifically), see below for more information!
|
||||
|
||||
Unholy was used for uncensoring, Roleplay Llama 3 for the DPO train he got on top, and LewdPlay for the... lewd side.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Llama-3-LewdPlay-8B-evo.q8_0.gguf
|
||||
files:
|
||||
- filename: Llama-3-LewdPlay-8B-evo.q8_0.gguf
|
||||
sha256: 1498152d598ff441f73ec6af9d3535875302e7251042d87feb7e71a3618966e8
|
||||
uri: huggingface://Undi95/Llama-3-LewdPlay-8B-evo-GGUF/Llama-3-LewdPlay-8B-evo.q8_0.gguf
|
||||
- <<: *llama3
|
||||
name: "chaos-rp_l3_b-iq-imatrix"
|
||||
urls:
|
||||
@@ -213,6 +260,87 @@
|
||||
- filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf
|
||||
sha256: 4273c5a8f23d49bf6294e620a5aa1fcd78d491ea0b90d0ec63ad708eedb83893
|
||||
uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf
|
||||
- <<: *llama3
|
||||
name: "sovl_llama3_8b-gguf-iq-imatrix"
|
||||
urls:
|
||||
- https://huggingface.co/Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/N_1D87adbMuMlSIQ5rI3_.png
|
||||
description: |
|
||||
I'm not gonna tell you this is the best model anyone has ever made. I'm not going to tell you that you will love chatting with SOVL.
|
||||
|
||||
What I am gonna say is thank you for taking the time out of your day. Without users like you, my work would be meaningless.
|
||||
overrides:
|
||||
parameters:
|
||||
model: SOVL_Llama3_8B-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: SOVL_Llama3_8B-Q4_K_M-imat.gguf
|
||||
sha256: ee61890dd26d52985a3c44279d519ca8592448ddeb46387cf22868548703d686
|
||||
uri: huggingface://Lewdiculous/SOVL_Llama3_8B-GGUF-IQ-Imatrix/SOVL_Llama3_8B-Q4_K_M-imat.gguf
|
||||
- <<: *llama3
|
||||
name: "l3-solana-8b-v1-gguf"
|
||||
url: "github:mudler/LocalAI/gallery/solana.yaml@master"
|
||||
license: cc-by-nc-4.0
|
||||
urls:
|
||||
- https://huggingface.co/Sao10K/L3-Solana-8B-v1-GGUF
|
||||
description: |
|
||||
A Full Fine-Tune of meta-llama/Meta-Llama-3-8B done with 2x A100 80GB on ~75M Tokens worth of Instruct, and Multi-Turn complex conversations, of up to 8192 tokens long sequence lengths.
|
||||
|
||||
Trained as a generalist instruct model that should be able to handle certain unsavoury topics. It could roleplay too, as a side bonus.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3-Solana-8B-v1.q5_K_M.gguf
|
||||
files:
|
||||
- filename: L3-Solana-8B-v1.q5_K_M.gguf
|
||||
sha256: 9b8cd2c3beaab5e4f82efd10e7d44f099ad40a4e0ee286ca9fce02c8eec26d2f
|
||||
uri: huggingface://Sao10K/L3-Solana-8B-v1-GGUF/L3-Solana-8B-v1.q5_K_M.gguf
|
||||
- <<: *llama3
|
||||
name: "average_normie_l3_v1_8b-gguf-iq-imatrix"
|
||||
urls:
|
||||
- https://huggingface.co/Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/dvNIj1rSTjBvgs3XJfqXK.png
|
||||
description: |
|
||||
A model by an average normie for the average normie.
|
||||
|
||||
This model is a stock merge of the following models:
|
||||
|
||||
https://huggingface.co/cgato/L3-TheSpice-8b-v0.1.3
|
||||
|
||||
https://huggingface.co/Sao10K/L3-Solana-8B-v1
|
||||
|
||||
https://huggingface.co/ResplendentAI/Kei_Llama3_8B
|
||||
|
||||
The final merge then had the following LoRA applied over it:
|
||||
|
||||
https://huggingface.co/ResplendentAI/Theory_of_Mind_Llama3
|
||||
|
||||
This should be an intelligent and adept roleplaying model.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf
|
||||
sha256: 9e98cd2672f716a0872912fdc4877969efd14d6f682f28e156f8591591c00d9c
|
||||
uri: huggingface://Lewdiculous/Average_Normie_l3_v1_8B-GGUF-IQ-Imatrix/Average_Normie_l3_v1_8B-Q4_K_M-imat.gguf
|
||||
- <<: *llama3
|
||||
name: "llama-3-8b-lexifun-uncensored-v1"
|
||||
icon: "https://cdn-uploads.huggingface.co/production/uploads/644ad182f434a6a63b18eee6/GrOs1IPG5EXR3MOCtcQiz.png"
|
||||
license: llama3
|
||||
urls:
|
||||
- https://huggingface.co/Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF
|
||||
- https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1
|
||||
description: |
|
||||
This is GGUF version of https://huggingface.co/Orenguteng/LexiFun-Llama-3-8B-Uncensored-V1
|
||||
|
||||
Oh, you want to know who I am? Well, I'm LexiFun, the human equivalent of a chocolate chip cookie - warm, gooey, and guaranteed to make you smile! 🍪 I'm like the friend who always has a witty comeback, a sarcastic remark, and a healthy dose of humor to brighten up even the darkest of days. And by 'healthy dose,' I mean I'm basically a walking pharmacy of laughter. You might need to take a few extra doses to fully recover from my jokes, but trust me, it's worth it! 🏥
|
||||
|
||||
So, what can I do? I can make you laugh so hard you snort your coffee out your nose, I can make you roll your eyes so hard they get stuck that way, and I can make you wonder if I'm secretly a stand-up comedian who forgot their act. 🤣 But seriously, I'm here to spread joy, one sarcastic comment at a time. And if you're lucky, I might even throw in a few dad jokes for good measure! 🤴♂️ Just don't say I didn't warn you. 😏
|
||||
overrides:
|
||||
parameters:
|
||||
model: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf
|
||||
files:
|
||||
- filename: LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf
|
||||
sha256: 961a3fb75537d650baf14dce91d40df418ec3d481b51ab2a4f44ffdfd6b5900f
|
||||
uri: huggingface://Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF/LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf
|
||||
- <<: *llama3
|
||||
name: "llama-3-unholy-8b:Q8_0"
|
||||
urls:
|
||||
@@ -278,6 +406,22 @@
|
||||
- filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
|
||||
sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2
|
||||
uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
|
||||
- <<: *llama3
|
||||
name: "suzume-llama-3-8B-multilingual"
|
||||
urls:
|
||||
- https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-gguf
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kg3QjQOde0X743csGJT-f.png
|
||||
description: |
|
||||
This Suzume 8B, a multilingual finetune of Llama 3.
|
||||
|
||||
Llama 3 has exhibited excellent performance on many English language benchmarks. However, it also seemingly been finetuned on mostly English data, meaning that it will respond in English, even if prompted in other languages.
|
||||
overrides:
|
||||
parameters:
|
||||
model: suzume-llama-3-8B-multilingual-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: suzume-llama-3-8B-multilingual-Q4_K_M.gguf
|
||||
sha256: be197a660e56e51a24a0e0fecd42047d1b24e1423afaafa14769541b331e3269
|
||||
uri: huggingface://lightblue/suzume-llama-3-8B-multilingual-gguf/ggml-model-Q4_K_M.gguf
|
||||
- &dolphin
|
||||
name: "dolphin-2.9-llama3-8b"
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
@@ -311,6 +455,25 @@
|
||||
- filename: dolphin-2.9-llama3-8b-q6_K.gguf
|
||||
sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32
|
||||
uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf
|
||||
- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "llama-3-8b-instruct-dpo-v0.3-32k"
|
||||
license: llama3
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- llama3
|
||||
overrides:
|
||||
context_size: 32768
|
||||
parameters:
|
||||
model: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf
|
||||
sha256: 694c55b5215d03e59626cd4292076eaf31610ef27ba04737166766baa75d889f
|
||||
uri: huggingface://MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF/Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf
|
||||
## LLama2 and derivatives
|
||||
### Start Fimbulvetr
|
||||
- &vicuna-chat
|
||||
@@ -357,6 +520,33 @@
|
||||
- filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
|
||||
uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
### START Vicuna based
|
||||
- &wizardlm2
|
||||
url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
|
||||
name: "wizardlm2-7b"
|
||||
description: |
|
||||
We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B.
|
||||
|
||||
WizardLM-2 8x22B is our most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.
|
||||
WizardLM-2 70B reaches top-tier reasoning capabilities and is the first choice in the same size.
|
||||
WizardLM-2 7B is the fastest and achieves comparable performance with existing 10x larger opensource leading models.
|
||||
icon: https://github.com/nlpxucan/WizardLM/raw/main/imgs/WizardLM.png
|
||||
license: apache-2.0
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/WizardLM-2-7B-GGUF
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- mistral
|
||||
overrides:
|
||||
parameters:
|
||||
model: WizardLM-2-7B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: WizardLM-2-7B.Q4_K_M.gguf
|
||||
sha256: 613212417701a26fd43f565c5c424a2284d65b1fddb872b53a99ef8add796f64
|
||||
uri: huggingface://MaziyarPanahi/WizardLM-2-7B-GGUF/WizardLM-2-7B.Q4_K_M.gguf
|
||||
### START LLaVa
|
||||
- &llava
|
||||
url: "github:mudler/LocalAI/gallery/llava.yaml@master"
|
||||
@@ -411,6 +601,32 @@
|
||||
- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
|
||||
sha256: 09c230de47f6f843e4841656f7895cac52c6e7ec7392acb5e8527de8b775c45a
|
||||
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
|
||||
- <<: *llama3
|
||||
name: "poppy_porpoise-v0.7-l3-8b-iq-imatrix"
|
||||
description: |
|
||||
"Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences.
|
||||
|
||||
Update: Vision/multimodal capabilities again!
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/v6AZmbk-Cb52KskTQTwzW.png
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
- llava-1.5
|
||||
overrides:
|
||||
mmproj: Llava_1.5_Llama3_mmproj.gguf
|
||||
parameters:
|
||||
model: Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf
|
||||
files:
|
||||
- filename: Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf
|
||||
sha256: 04badadd6c88cd9c706efef8f5cd337057c805e43dd440a5936f87720c37eb33
|
||||
uri: huggingface://Lewdiculous/Poppy_Porpoise-v0.7-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-v0.7-L3-8B-Q4_K_M-imat.gguf
|
||||
- filename: Llava_1.5_Llama3_mmproj.gguf
|
||||
sha256: d2a9ca943975f6c49c4d55886e873f676a897cff796e92410ace6c20f4efd03b
|
||||
uri: huggingface://ChaoticNeutrals/Llava_1.5_Llama3_mmproj/mmproj-model-f16.gguf
|
||||
### START Phi-2
|
||||
- &phi-2-chat
|
||||
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
|
||||
@@ -508,7 +724,7 @@
|
||||
model: Phi-3-mini-4k-instruct-q4.gguf
|
||||
files:
|
||||
- filename: "Phi-3-mini-4k-instruct-q4.gguf"
|
||||
sha256: "4fed7364ee3e0c7cb4fe0880148bfdfcd1b630981efa0802a6b62ee52e7da97e"
|
||||
sha256: "8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef"
|
||||
uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf"
|
||||
- <<: *phi-3
|
||||
name: "phi-3-mini-4k-instruct:fp16"
|
||||
@@ -519,7 +735,7 @@
|
||||
- filename: "Phi-3-mini-4k-instruct-fp16.gguf"
|
||||
sha256: "ad9f8ff11cd096115adc8ff50befa22fc3da2718672ddd2ab30faccd70488605"
|
||||
uri: "huggingface://microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-fp16.gguf"
|
||||
### START Hermes-2-Pro-Mistral
|
||||
### START Hermes
|
||||
- &hermes-2-pro-mistral
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
name: "hermes-2-pro-mistral"
|
||||
@@ -571,7 +787,95 @@
|
||||
- filename: "Hermes-2-Pro-Mistral-7B.Q8_0.gguf"
|
||||
sha256: "b6d95d7ec9a395b7568cc94b0447fd4f90b6f69d6e44794b1fbb84e3f732baca"
|
||||
uri: "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q8_0.gguf"
|
||||
### END Hermes-2-Pro-Mistral
|
||||
### LLAMA3 version
|
||||
- <<: *hermes-2-pro-mistral
|
||||
name: "hermes-2-pro-llama-3-8b"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
urls:
|
||||
- https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
|
||||
sha256: "afe41ab251d1fd9870dd9631f60c22b22c215166308b35d7e15faa3260fa4bd7"
|
||||
uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
|
||||
- <<: *hermes-2-pro-mistral
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
name: "hermes-2-pro-llama-3-8b:Q5_K_M"
|
||||
urls:
|
||||
- https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf
|
||||
files:
|
||||
- filename: "Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf"
|
||||
sha256: "2be39d775b2a64aa5bbdc1f96fa1703ec54b5fa8982c1732b7ae9d2b57c6bb43"
|
||||
uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q5_K_M.gguf"
|
||||
- <<: *hermes-2-pro-mistral
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
name: "hermes-2-pro-llama-3-8b:Q8_0"
|
||||
urls:
|
||||
- https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: Hermes-2-Pro-Llama-3-8B-Q8_0.gguf
|
||||
files:
|
||||
- filename: "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf"
|
||||
sha256: "0a8f471d6940dee972e579eebdb4d536174bda82b73463cd8ac7752a7b1973a3"
|
||||
uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf"
|
||||
- <<: *hermes-2-pro-mistral
|
||||
name: "biomistral-7b"
|
||||
description: |
|
||||
BioMistral: A Collection of Open-Source Pretrained Large Language Models for Medical Domains
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF
|
||||
icon: https://huggingface.co/BioMistral/BioMistral-7B/resolve/main/wordart_blue_m_rectangle.png?download=true
|
||||
overrides:
|
||||
parameters:
|
||||
model: BioMistral-7B.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: "BioMistral-7B.Q4_K_M.gguf"
|
||||
sha256: "3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6"
|
||||
uri: "huggingface://MaziyarPanahi/BioMistral-7B-GGUF/BioMistral-7B.Q4_K_M.gguf"
|
||||
- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "guillaumetell-7b"
|
||||
license: apache-2
|
||||
description: |
|
||||
Guillaume Tell est un Large Language Model (LLM) français basé sur Mistral Open-Hermes 2.5 optimisé pour le RAG (Retrieval Augmented Generation) avec traçabilité des sources et explicabilité.
|
||||
urls:
|
||||
- https://huggingface.co/MaziyarPanahi/guillaumetell-7b-GGUF
|
||||
- https://huggingface.co/AgentPublic/guillaumetell-7b
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- cpu
|
||||
- openhermes
|
||||
- french
|
||||
overrides:
|
||||
context_size: 4096
|
||||
parameters:
|
||||
model: guillaumetell-7b.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: guillaumetell-7b.Q4_K_M.gguf
|
||||
sha256: bf08db5281619335f3ee87e229c8533b04262790063b061bb8f275c3e4de7061
|
||||
uri: huggingface://MaziyarPanahi/guillaumetell-7b-GGUF/guillaumetell-7b.Q4_K_M.gguf
|
||||
### START Cerbero
|
||||
- url: "github:mudler/LocalAI/gallery/cerbero.yaml@master"
|
||||
icon: https://huggingface.co/galatolo/cerbero-7b/resolve/main/README.md.d/cerbero.png
|
||||
@@ -585,6 +889,7 @@
|
||||
- gpu
|
||||
- cpu
|
||||
- mistral
|
||||
- italian
|
||||
overrides:
|
||||
parameters:
|
||||
model: galatolo-Q4_K.gguf
|
||||
@@ -618,7 +923,51 @@
|
||||
- filename: "codellama-7b.Q4_0.gguf"
|
||||
sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5"
|
||||
uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf"
|
||||
|
||||
### START OpenVINO
|
||||
- &openvino
|
||||
url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
|
||||
name: "openvino-llama-3-8b-instruct-ov-int8"
|
||||
license: llama3
|
||||
urls:
|
||||
- https://huggingface.co/fakezeta/llama-3-8b-instruct-ov-int8
|
||||
overrides:
|
||||
parameters:
|
||||
model: fakezeta/llama-3-8b-instruct-ov-int8
|
||||
stopwords:
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
tags:
|
||||
- llm
|
||||
- openvino
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
- <<: *openvino
|
||||
name: "openvino-phi3"
|
||||
urls:
|
||||
- https://huggingface.co/fakezeta/Phi-3-mini-128k-instruct-ov-int8
|
||||
overrides:
|
||||
context_size: 131072
|
||||
parameters:
|
||||
model: fakezeta/Phi-3-mini-128k-instruct-ov-int8
|
||||
stopwords:
|
||||
- <|end|>
|
||||
- <<: *openvino
|
||||
name: "openvino-starling-lm-7b-beta-openvino-int8"
|
||||
urls:
|
||||
- https://huggingface.co/fakezeta/Starling-LM-7B-beta-openvino-int8
|
||||
overrides:
|
||||
context_size: 8192
|
||||
parameters:
|
||||
model: fakezeta/Starling-LM-7B-beta-openvino-int8
|
||||
- <<: *openvino
|
||||
name: "openvino-wizardlm2"
|
||||
urls:
|
||||
- https://huggingface.co/fakezeta/Not-WizardLM-2-7B-ov-int8
|
||||
overrides:
|
||||
context_size: 8192
|
||||
parameters:
|
||||
model: fakezeta/Not-WizardLM-2-7B-ov-int8
|
||||
### START Embeddings
|
||||
- &sentencentransformers
|
||||
description: |
|
||||
|
||||
@@ -41,3 +41,4 @@ config_file: |
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "<|eot_id|>"
|
||||
- <|end_of_text|>
|
||||
|
||||
12
gallery/openvino.yaml
Normal file
12
gallery/openvino.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
---
|
||||
name: openvino
|
||||
|
||||
config_file: |
|
||||
backend: transformers
|
||||
context_size: 8192
|
||||
type: OVModelForCausalLM
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
stopwords:
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
15
gallery/wizardlm2.yaml
Normal file
15
gallery/wizardlm2.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
---
|
||||
name: "wizardlm2"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |-
|
||||
{{if eq .RoleName "assistant"}}ASSISTANT: {{.Content}}</s>{{else if eq .RoleName "system"}}{{.Content}}{{else if eq .RoleName "user"}}USER: {{.Content}}{{end}}
|
||||
chat: "{{.Input}}ASSISTANT: "
|
||||
completion: |-
|
||||
{{.Input}}
|
||||
context_size: 32768
|
||||
f16: true
|
||||
stopwords:
|
||||
- </s>
|
||||
@@ -2,6 +2,8 @@ package functions
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type Function struct {
|
||||
@@ -30,8 +32,14 @@ func (f Functions) ToJSONStructure() JSONFunctionStructure {
|
||||
prop := map[string]interface{}{}
|
||||
defsD := map[string]interface{}{}
|
||||
|
||||
json.Unmarshal(dat, &prop)
|
||||
json.Unmarshal(dat2, &defsD)
|
||||
err := json.Unmarshal(dat, &prop)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error unmarshalling dat")
|
||||
}
|
||||
err = json.Unmarshal(dat2, &defsD)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error unmarshalling dat2")
|
||||
}
|
||||
if js.Defs == nil {
|
||||
js.Defs = defsD
|
||||
}
|
||||
|
||||
@@ -59,7 +59,10 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
|
||||
if multipleResults {
|
||||
ss := []map[string]interface{}{}
|
||||
s := utils.EscapeNewLines(llmresult)
|
||||
json.Unmarshal([]byte(s), &ss)
|
||||
err := json.Unmarshal([]byte(s), &ss)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("escapedLLMResult", s).Msg("multiple results: unable to unmarshal llm result")
|
||||
}
|
||||
log.Debug().Msgf("Function return: %s %+v", s, ss)
|
||||
|
||||
for _, s := range ss {
|
||||
@@ -83,7 +86,10 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
|
||||
ss := map[string]interface{}{}
|
||||
// This prevent newlines to break JSON parsing for clients
|
||||
s := utils.EscapeNewLines(llmresult)
|
||||
json.Unmarshal([]byte(s), &ss)
|
||||
err := json.Unmarshal([]byte(s), &ss)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result")
|
||||
}
|
||||
log.Debug().Msgf("Function return: %s %+v", s, ss)
|
||||
|
||||
// The grammar defines the function name as "function", while OpenAI returns "name"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gallery
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -184,3 +185,48 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error)
|
||||
}
|
||||
return models, nil
|
||||
}
|
||||
|
||||
func DeleteModelFromSystem(basePath string, name string, additionalFiles []string) error {
|
||||
// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
|
||||
configFile := filepath.Join(basePath, fmt.Sprintf("%s.yaml", name))
|
||||
|
||||
galleryFile := filepath.Join(basePath, galleryFileName(name))
|
||||
|
||||
var err error
|
||||
// Delete all the files associated to the model
|
||||
// read the model config
|
||||
galleryconfig, err := ReadConfigFile(galleryFile)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
|
||||
}
|
||||
|
||||
// Remove additional files
|
||||
if galleryconfig != nil {
|
||||
for _, f := range galleryconfig.Files {
|
||||
fullPath := filepath.Join(basePath, f.Filename)
|
||||
log.Debug().Msgf("Removing file %s", fullPath)
|
||||
if e := os.Remove(fullPath); e != nil {
|
||||
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, f := range additionalFiles {
|
||||
fullPath := filepath.Join(filepath.Join(basePath, f))
|
||||
log.Debug().Msgf("Removing additional file %s", fullPath)
|
||||
if e := os.Remove(fullPath); e != nil {
|
||||
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Removing model config file %s", configFile)
|
||||
|
||||
// Delete the model config file
|
||||
if e := os.Remove(configFile); e != nil {
|
||||
err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gallery_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
@@ -11,3 +12,9 @@ func TestGallery(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Gallery test suite")
|
||||
}
|
||||
|
||||
var _ = BeforeSuite(func() {
|
||||
if os.Getenv("FIXTURES") == "" {
|
||||
Fail("FIXTURES env var not set")
|
||||
}
|
||||
})
|
||||
|
||||
@@ -178,5 +178,20 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
|
||||
log.Debug().Msgf("Written config file %s", configFilePath)
|
||||
}
|
||||
|
||||
return nil
|
||||
// Save the model gallery file for further reference
|
||||
modelFile := filepath.Join(basePath, galleryFileName(name))
|
||||
data, err := yaml.Marshal(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Written gallery file %s", modelFile)
|
||||
|
||||
return os.WriteFile(modelFile, data, 0600)
|
||||
|
||||
//return nil
|
||||
}
|
||||
|
||||
func galleryFileName(name string) string {
|
||||
return "._gallery_" + name + ".yaml"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package gallery_test
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -11,6 +12,7 @@ import (
|
||||
)
|
||||
|
||||
var _ = Describe("Model test", func() {
|
||||
|
||||
Context("Downloading", func() {
|
||||
It("applies model correctly", func() {
|
||||
tempdir, err := os.MkdirTemp("", "test")
|
||||
@@ -80,6 +82,19 @@ var _ = Describe("Model test", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models)).To(Equal(1))
|
||||
Expect(models[0].Installed).To(BeTrue())
|
||||
|
||||
// delete
|
||||
err = DeleteModelFromSystem(tempdir, "bert", []string{})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
models, err = AvailableGalleryModels(galleries, tempdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models)).To(Equal(1))
|
||||
Expect(models[0].Installed).To(BeFalse())
|
||||
|
||||
_, err = os.Stat(filepath.Join(tempdir, "bert.yaml"))
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
|
||||
})
|
||||
|
||||
It("renames model correctly", func() {
|
||||
|
||||
@@ -4,12 +4,14 @@ type GalleryOp struct {
|
||||
Id string
|
||||
GalleryName string
|
||||
ConfigURL string
|
||||
Delete bool
|
||||
|
||||
Req GalleryModel
|
||||
Galleries []Gallery
|
||||
}
|
||||
|
||||
type GalleryOpStatus struct {
|
||||
Deletion bool `json:"deletion"` // Deletion is true if the operation is a deletion
|
||||
FileName string `json:"file_name"`
|
||||
Error error `json:"error"`
|
||||
Processed bool `json:"processed"`
|
||||
|
||||
@@ -41,7 +41,7 @@ type Backend interface {
|
||||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error)
|
||||
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
|
||||
Status(ctx context.Context) (*pb.StatusResponse, error)
|
||||
|
||||
|
||||
@@ -53,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
|
||||
return fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) {
|
||||
return schema.Result{}, fmt.Errorf("unimplemented")
|
||||
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
||||
return schema.TranscriptionResult{}, fmt.Errorf("unimplemented")
|
||||
}
|
||||
|
||||
func (llm *Base) TTS(*pb.TTSRequest) error {
|
||||
|
||||
@@ -210,7 +210,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
|
||||
return client.TTS(ctx, in, opts...)
|
||||
}
|
||||
|
||||
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
|
||||
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
@@ -231,7 +231,7 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tresult := &schema.Result{}
|
||||
tresult := &schema.TranscriptionResult{}
|
||||
for _, s := range res.Segments {
|
||||
tks := []int{}
|
||||
for _, t := range s.Tokens {
|
||||
|
||||
@@ -53,12 +53,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.
|
||||
return e.s.TTS(ctx, in)
|
||||
}
|
||||
|
||||
func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
|
||||
func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
|
||||
r, err := e.s.AudioTranscription(ctx, in)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tr := &schema.Result{}
|
||||
tr := &schema.TranscriptionResult{}
|
||||
for _, s := range r.Segments {
|
||||
var tks []int
|
||||
for _, t := range s.Tokens {
|
||||
|
||||
@@ -15,7 +15,7 @@ type LLM interface {
|
||||
Load(*pb.ModelOptions) error
|
||||
Embeddings(*pb.PredictOptions) ([]float32, error)
|
||||
GenerateImage(*pb.GenerateImageRequest) error
|
||||
AudioTranscription(*pb.TranscriptRequest) (schema.Result, error)
|
||||
AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error)
|
||||
TTS(*pb.TTSRequest) error
|
||||
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
|
||||
Status() (pb.StatusResponse, error)
|
||||
|
||||
@@ -70,7 +70,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
||||
// If no specific model path is set for transformers/HF, set it to the model path
|
||||
for _, env := range []string{"HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"} {
|
||||
if os.Getenv(env) == "" {
|
||||
os.Setenv(env, ml.ModelPath)
|
||||
err := os.Setenv(env, ml.ModelPath)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("name", env).Str("modelPath", ml.ModelPath).Msg("unable to set environment variable to modelPath")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,8 +187,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
if o.singleActiveBackend {
|
||||
ml.mu.Lock()
|
||||
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
||||
ml.StopAllExcept(o.model)
|
||||
err := ml.StopAllExcept(o.model)
|
||||
ml.mu.Unlock()
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var backendToConsume string
|
||||
@@ -224,7 +232,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
||||
// If we can have only one backend active, kill all the others (except external backends)
|
||||
if o.singleActiveBackend {
|
||||
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
||||
ml.StopAllExcept(o.model)
|
||||
err := ml.StopAllExcept(o.model)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
||||
}
|
||||
}
|
||||
ml.mu.Unlock()
|
||||
|
||||
|
||||
@@ -96,7 +96,13 @@ func (ml *ModelLoader) ListModels() ([]string, error) {
|
||||
models := []string{}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML, .keep, .json, and .DS_Store files - TODO: as this list grows, is there a more efficient method?
|
||||
if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") || strings.HasSuffix(file.Name(), ".json") || strings.HasSuffix(file.Name(), ".DS_Store") {
|
||||
if strings.HasSuffix(file.Name(), ".tmpl") ||
|
||||
strings.HasSuffix(file.Name(), ".keep") ||
|
||||
strings.HasSuffix(file.Name(), ".yaml") ||
|
||||
strings.HasSuffix(file.Name(), ".yml") ||
|
||||
strings.HasSuffix(file.Name(), ".json") ||
|
||||
strings.HasSuffix(file.Name(), ".DS_Store") ||
|
||||
strings.HasPrefix(file.Name(), ".") {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -168,7 +174,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
|
||||
if !ml.grpcProcesses[s].IsAlive() {
|
||||
log.Debug().Msgf("GRPC Process is not responding: %s", s)
|
||||
// stop and delete the process, this forces to re-load the model and re-create again the service
|
||||
ml.deleteProcess(s)
|
||||
err := ml.deleteProcess(s)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("process", s).Msg("error stopping process")
|
||||
}
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -14,8 +15,8 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func (ml *ModelLoader) StopAllExcept(s string) {
|
||||
ml.StopGRPC(func(id string, p *process.Process) bool {
|
||||
func (ml *ModelLoader) StopAllExcept(s string) error {
|
||||
return ml.StopGRPC(func(id string, p *process.Process) bool {
|
||||
if id != s {
|
||||
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", id)
|
||||
@@ -43,16 +44,19 @@ func includeAllProcesses(_ string, _ *process.Process) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) {
|
||||
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
||||
var err error = nil
|
||||
for k, p := range ml.grpcProcesses {
|
||||
if filter(k, p) {
|
||||
ml.deleteProcess(k)
|
||||
e := ml.deleteProcess(k)
|
||||
err = errors.Join(err, e)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) StopAllGRPC() {
|
||||
ml.StopGRPC(includeAllProcesses)
|
||||
func (ml *ModelLoader) StopAllGRPC() error {
|
||||
return ml.StopGRPC(includeAllProcesses)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
|
||||
|
||||
50
pkg/utils/base64.go
Normal file
50
pkg/utils/base64.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var base64DownloadClient http.Client = http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||
// encodes it in base64 and returns the base64 string
|
||||
|
||||
// This may look weird down in pkg/utils while it is currently only used in core/config
|
||||
//
|
||||
// but I believe it may be useful for MQTT as well in the near future, so I'm
|
||||
// extracting it while I'm thinking of it.
|
||||
func GetImageURLAsBase64(s string) (string, error) {
|
||||
if strings.HasPrefix(s, "http") {
|
||||
// download the image
|
||||
resp, err := base64DownloadClient.Get(s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// read the image data into memory
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// encode the image data in base64
|
||||
encoded := base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// return the base64 string
|
||||
return encoded, nil
|
||||
}
|
||||
|
||||
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||
}
|
||||
return "", fmt.Errorf("not valid string")
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user