mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
53 Commits
v1.30.0
...
renovate/g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
82eac07018 | ||
|
|
c62504ac92 | ||
|
|
f227e918f9 | ||
|
|
c132dbadce | ||
|
|
b839eb80a1 | ||
|
|
23b03a7f03 | ||
|
|
9196583651 | ||
|
|
fd28252e55 | ||
|
|
94f20e2eb7 | ||
|
|
5ced99a8e7 | ||
|
|
c377e61ff0 | ||
|
|
a6fe0a020a | ||
|
|
bf2ed3d752 | ||
|
|
d17a92eef3 | ||
|
|
1a7be035d3 | ||
|
|
004baaa30f | ||
|
|
ef19268418 | ||
|
|
e82470341f | ||
|
|
88fa42de75 | ||
|
|
432513c3ba | ||
|
|
45370c212b | ||
|
|
e91f660eb1 | ||
|
|
3f3162e57c | ||
|
|
208d1fce58 | ||
|
|
128694213f | ||
|
|
8034ed3473 | ||
|
|
d22069c59e | ||
|
|
5a04d32b39 | ||
|
|
ab65f3a17d | ||
|
|
4e23cbebcf | ||
|
|
63418c1afc | ||
|
|
8ca671761a | ||
|
|
81a5ed9f31 | ||
|
|
528b9d9206 | ||
|
|
1a4c57fac2 | ||
|
|
44a7045732 | ||
|
|
8ac7186185 | ||
|
|
975387f7ae | ||
|
|
d793b5af5e | ||
|
|
5188776224 | ||
|
|
07249c0446 | ||
|
|
188301f403 | ||
|
|
e660721a0c | ||
|
|
e029cc66bc | ||
|
|
e34b5f0119 | ||
|
|
c223364816 | ||
|
|
74fd5844ca | ||
|
|
4ebc86df84 | ||
|
|
8cd03eff58 | ||
|
|
46660a16a0 | ||
|
|
27b097309e | ||
|
|
d0fa1f8e94 | ||
|
|
55e38fea0e |
5
.github/workflows/bump_deps.yaml
vendored
5
.github/workflows/bump_deps.yaml
vendored
@@ -12,6 +12,9 @@ jobs:
|
||||
- repository: "go-skynet/go-llama.cpp"
|
||||
variable: "GOLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "ggerganov/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||
branch: "master"
|
||||
@@ -41,7 +44,7 @@ jobs:
|
||||
branch: "master"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||
|
||||
11
.github/workflows/image.yml
vendored
11
.github/workflows/image.yml
vendored
@@ -19,7 +19,8 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
#platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
@@ -38,7 +39,7 @@ jobs:
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
ffmpeg: ''
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -82,6 +83,10 @@ jobs:
|
||||
sudo apt-get remove -y azure-cli || true
|
||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
sudo apt-get remove -y '^gfortran-.*' || true
|
||||
sudo apt-get remove -y microsoft-edge-stable || true
|
||||
sudo apt-get remove -y firefox || true
|
||||
sudo apt-get remove -y powershell || true
|
||||
sudo apt-get remove -y r-base-core || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
echo
|
||||
@@ -92,7 +97,7 @@ jobs:
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
|
||||
18
.github/workflows/release.yaml
vendored
18
.github/workflows/release.yaml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v4
|
||||
@@ -29,6 +29,12 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
@@ -60,18 +66,26 @@ jobs:
|
||||
runs-on: macOS-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '>=1.21.0'
|
||||
- name: Dependencies
|
||||
run: |
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install && rm -rf grpc
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||
BUILD_ID: "${{ matrix.build }}"
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
||||
63
.github/workflows/test-gpu.yml
vendored
Normal file
63
.github/workflows/test-gpu.yml
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
---
|
||||
name: 'GPU tests'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
concurrency:
|
||||
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: self-hosted
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
|
||||
- name: Build
|
||||
run: |
|
||||
if [ ! -e /run/systemd/system ]; then
|
||||
sudo mkdir /run/systemd/system
|
||||
fi
|
||||
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
|
||||
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
|
||||
make \
|
||||
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||
BUILD_TYPE=cublas \
|
||||
prepare-e2e run-e2e-image test-e2e
|
||||
- name: Release space from worker ♻
|
||||
if: always()
|
||||
run: |
|
||||
sudo rm -rf build || true
|
||||
sudo rm -rf bin || true
|
||||
sudo rm -rf dist || true
|
||||
sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
|
||||
sudo cat logs.txt || true
|
||||
sudo rm -rf logs.txt
|
||||
make clean || true
|
||||
make \
|
||||
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||
teardown-e2e || true
|
||||
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }}
|
||||
docker system prune -f -a --volumes || true
|
||||
56
.github/workflows/test.yml
vendored
56
.github/workflows/test.yml
vendored
@@ -20,8 +20,40 @@ jobs:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
run: |
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
df -h
|
||||
echo
|
||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo apt-get remove -y '^mono-.*' || true
|
||||
sudo apt-get remove -y '^ghc-.*' || true
|
||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
sudo apt-get remove -y 'php.*' || true
|
||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
sudo apt-get remove -y '^google-.*' || true
|
||||
sudo apt-get remove -y azure-cli || true
|
||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
sudo apt-get remove -y '^gfortran-.*' || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
echo
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
@@ -35,11 +67,15 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
|
||||
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
sudo pip install -r extra/requirements.txt
|
||||
|
||||
|
||||
# Pre-build stable diffusion before we install a newever version of abseil (not compatible with stablediffusion-ncn)
|
||||
GO_TAGS="tts stablediffusion" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
|
||||
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
|
||||
tar -xzvf - && \
|
||||
@@ -55,6 +91,12 @@ jobs:
|
||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||
sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
||||
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
|
||||
@@ -66,7 +108,7 @@ jobs:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
@@ -76,6 +118,14 @@ jobs:
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install && rm -rf grpc
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||
101
Dockerfile
101
Dockerfile
@@ -1,6 +1,9 @@
|
||||
ARG GO_VERSION=1.21-bullseye
|
||||
ARG IMAGE_TYPE=extras
|
||||
# extras or core
|
||||
|
||||
FROM golang:$GO_VERSION as requirements
|
||||
|
||||
FROM golang:$GO_VERSION as requirements-core
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
@@ -16,7 +19,8 @@ ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/i
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates cmake curl patch pip
|
||||
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
||||
|
||||
|
||||
# Use the variables in subsequent instructions
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
@@ -30,36 +34,21 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
||||
rm -f cuda-keyring_1.0-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||
; fi
|
||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
|
||||
# Extras requirements
|
||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||
fi
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||
fi
|
||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get install -y \
|
||||
libopenblas-dev \
|
||||
libopencv-dev \
|
||||
&& apt-get clean
|
||||
|
||||
# Vall-e-X
|
||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# OpenBLAS requirements
|
||||
RUN apt-get install -y libopenblas-dev
|
||||
|
||||
# Stable Diffusion requirements
|
||||
RUN apt-get install -y libopencv-dev && \
|
||||
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
|
||||
# piper requirements
|
||||
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
||||
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
||||
@@ -79,17 +68,39 @@ RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSIO
|
||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ && \
|
||||
rm spdlog-${SPDLOG_VERSION} -rf
|
||||
|
||||
# Extras requirements
|
||||
FROM requirements-core as requirements-extras
|
||||
|
||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||
fi
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||
fi
|
||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||
|
||||
# Vall-e-X
|
||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||
|
||||
# \
|
||||
# ; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements as builder
|
||||
FROM requirements-${IMAGE_TYPE} as builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
|
||||
ARG GRPC_BACKENDS
|
||||
ARG BUILD_GRPC=true
|
||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||
ENV GO_TAGS=${GO_TAGS}
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
@@ -104,12 +115,23 @@ RUN make prepare
|
||||
COPY . .
|
||||
COPY .git .
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install && rm -rf grpc \
|
||||
; fi
|
||||
|
||||
# Rebuild with defaults backends
|
||||
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements
|
||||
FROM requirements-${IMAGE_TYPE}
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
@@ -119,6 +141,11 @@ ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV REBUILD=false
|
||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get install -y ffmpeg \
|
||||
@@ -132,15 +159,23 @@ WORKDIR /build
|
||||
# https://github.com/go-skynet/LocalAI/pull/434
|
||||
COPY . .
|
||||
RUN make prepare-sources
|
||||
|
||||
# Copy the binary
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
# Copy VALLE-X as it's not a real "lib"
|
||||
RUN cp -rfv /usr/lib/vall-e-x/* ./
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
# To resolve exllama import error
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
|
||||
# Copy VALLE-X as it's not a real "lib"
|
||||
RUN if [ -d /usr/lib/vall-e-x ]; then \
|
||||
cp -rfv /usr/lib/vall-e-x/* ./ ; \
|
||||
fi
|
||||
|
||||
# we also copy exllama libs over to resolve exllama import error
|
||||
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
|
||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
||||
fi
|
||||
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||
|
||||
114
Makefile
114
Makefile
@@ -4,10 +4,12 @@ GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_VERSION?=d9f6176409de0a2b5ce798de502545c6721e346e
|
||||
GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||
|
||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||
|
||||
CPPLLAMA_VERSION?=9d02956443e5c1ded29b7b5ed8a21bc01ba6f563
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||
@@ -28,22 +30,21 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
# go-piper version
|
||||
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
||||
|
||||
# go-bloomz version
|
||||
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
||||
|
||||
# Go-ggllm
|
||||
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
export CMAKE_ARGS?=
|
||||
CGO_LDFLAGS?=
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=git
|
||||
|
||||
TEST_DIR=/tmp/test
|
||||
|
||||
RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=
|
||||
@@ -61,6 +62,9 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
# Default Docker bridge IP
|
||||
E2E_BRIDGE_IP?=172.17.0.1
|
||||
|
||||
ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
@@ -82,6 +86,18 @@ ifeq ($(BUILD_TYPE),cublas)
|
||||
export LLAMA_CUBLAS=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# Llama-stable has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
export LLAMA_METAL=1
|
||||
@@ -107,7 +123,13 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||
|
||||
# If empty, then we build all
|
||||
ifeq ($(GRPC_BACKENDS),)
|
||||
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
||||
endif
|
||||
|
||||
.PHONY: all test build vendor
|
||||
|
||||
@@ -118,14 +140,6 @@ gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## go-ggllm
|
||||
go-ggllm:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
|
||||
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-ggllm/libggllm.a: go-ggllm
|
||||
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
|
||||
|
||||
## go-piper
|
||||
go-piper:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
||||
@@ -152,14 +166,6 @@ go-rwkv:
|
||||
go-rwkv/librwkv.a: go-rwkv
|
||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
## bloomz
|
||||
bloomz:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
||||
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
bloomz/libbloomz.a: bloomz
|
||||
cd bloomz && make libbloomz.a
|
||||
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
@@ -208,12 +214,12 @@ go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||
|
||||
go-llama-stable/libbinding.a: go-llama-stable
|
||||
$(MAKE) -C go-llama-stable BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
go-piper/libpiper_binding.a:
|
||||
go-piper/libpiper_binding.a: go-piper
|
||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
||||
|
||||
get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
|
||||
get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion
|
||||
touch $@
|
||||
|
||||
replace:
|
||||
@@ -222,10 +228,8 @@ replace:
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
$(GOCMD) mod download
|
||||
@@ -241,9 +245,7 @@ rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
$(MAKE) -C go-stable-diffusion clean
|
||||
$(MAKE) -C go-bert clean
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) -C go-piper clean
|
||||
$(MAKE) -C go-ggllm clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
@@ -261,12 +263,11 @@ clean: ## Remove build related file
|
||||
rm -rf ./backend-assets
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
rm -rf ./bloomz
|
||||
rm -rf ./whisper.cpp
|
||||
rm -rf ./go-piper
|
||||
rm -rf ./go-ggllm
|
||||
rm -rf $(BINARY_NAME)
|
||||
rm -rf release/
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
|
||||
## Build:
|
||||
|
||||
@@ -313,6 +314,26 @@ test: prepare test-models/testmodel grpcs
|
||||
$(MAKE) test-tts
|
||||
$(MAKE) test-stablediffusion
|
||||
|
||||
prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||
|
||||
test-e2e:
|
||||
@echo 'Running e2e tests'
|
||||
BUILD_TYPE=$(BUILD_TYPE) \
|
||||
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||
|
||||
teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
test-gpt4all: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
||||
@@ -369,10 +390,6 @@ protogen-python:
|
||||
backend-assets/grpc:
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
|
||||
|
||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||
@@ -382,6 +399,16 @@ ifeq ($(BUILD_TYPE),metal)
|
||||
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend/cpp/llama/grpc-server:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||
@@ -427,10 +454,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
||||
|
||||
backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
|
||||
|
||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
||||
@@ -438,9 +461,12 @@ backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
||||
|
||||
backend-assets/grpc/stablediffusion: backend-assets/grpc go-stable-diffusion/libstablediffusion.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/
|
||||
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||
$(MAKE) go-stable-diffusion/libstablediffusion.a; \
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
|
||||
fi
|
||||
|
||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -120,6 +121,9 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
||||
|
||||
// Default middleware config
|
||||
app.Use(recover.New())
|
||||
if options.Metrics != nil {
|
||||
app.Use(metrics.APIMiddleware(options.Metrics))
|
||||
}
|
||||
|
||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||
auth := func(c *fiber.Ctx) error {
|
||||
@@ -229,5 +233,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||
|
||||
app.Get("/metrics", metrics.MetricsHandler())
|
||||
|
||||
return app, nil
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
@@ -162,8 +163,12 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
}
|
||||
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithMetrics(metricsService),
|
||||
options.WithContext(c),
|
||||
options.WithGalleries(galleries),
|
||||
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
|
||||
@@ -479,9 +484,13 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
}
|
||||
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithContext(c),
|
||||
options.WithMetrics(metricsService),
|
||||
options.WithAudioDir(tmpdir),
|
||||
options.WithImageDir(tmpdir),
|
||||
options.WithGalleries(galleries),
|
||||
@@ -583,12 +592,15 @@ var _ = Describe("API test", func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
var err error
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
options.WithContext(c),
|
||||
options.WithModelLoader(modelLoader),
|
||||
options.WithMetrics(metricsService),
|
||||
)...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
@@ -792,10 +804,13 @@ var _ = Describe("API test", func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
var err error
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithContext(c),
|
||||
options.WithMetrics(metricsService),
|
||||
options.WithModelLoader(modelLoader),
|
||||
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/backend"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
@@ -15,15 +16,20 @@ import (
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||
emptyMessage := ""
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
||||
Object: "chat.completion.chunk",
|
||||
@@ -32,6 +38,8 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
|
||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
resp := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
||||
Object: "chat.completion.chunk",
|
||||
@@ -261,7 +269,9 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
FinishReason: "stop",
|
||||
@@ -355,6 +365,8 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "chat.completion",
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/backend"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
@@ -13,16 +14,22 @@ import (
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
resp := schema.OpenAIResponse{
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
Index: 0,
|
||||
@@ -108,7 +115,9 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{
|
||||
{
|
||||
Index: 0,
|
||||
@@ -156,6 +165,8 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "text_completion",
|
||||
|
||||
@@ -3,6 +3,7 @@ package openai
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/backend"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
@@ -10,6 +11,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -62,7 +64,11 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
result = append(result, r...)
|
||||
}
|
||||
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "edit",
|
||||
|
||||
@@ -3,10 +3,12 @@ package openai
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/backend"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -57,10 +59,14 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||
}
|
||||
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
resp := &schema.OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Data: items,
|
||||
Object: "list",
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Data: items,
|
||||
Object: "list",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
|
||||
@@ -5,11 +5,14 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/go-skynet/LocalAI/api/backend"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
@@ -174,8 +177,12 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
||||
}
|
||||
}
|
||||
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
resp := &schema.OpenAIResponse{
|
||||
Data: result,
|
||||
ID: id,
|
||||
Created: created,
|
||||
Data: result,
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -24,6 +25,7 @@ type Option struct {
|
||||
PreloadModelsFromPath string
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
Metrics *metrics.Metrics
|
||||
|
||||
Galleries []gallery.Gallery
|
||||
|
||||
@@ -198,3 +200,9 @@ func WithApiKeys(apiKeys []string) AppOption {
|
||||
o.ApiKeys = apiKeys
|
||||
}
|
||||
}
|
||||
|
||||
func WithMetrics(meter *metrics.Metrics) AppOption {
|
||||
return func(o *Option) {
|
||||
o.Metrics = meter
|
||||
}
|
||||
}
|
||||
|
||||
61
backend/cpp/llama/CMakeLists.txt
Normal file
61
backend/cpp/llama/CMakeLists.txt
Normal file
@@ -0,0 +1,61 @@
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
set(TARGET grpc-server)
|
||||
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||
set(_REFLECTION grpc++_reflection)
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
link_directories("/opt/homebrew/lib")
|
||||
include_directories("/opt/homebrew/include")
|
||||
endif()
|
||||
|
||||
find_package(absl CONFIG REQUIRED)
|
||||
find_package(Protobuf CONFIG REQUIRED)
|
||||
find_package(gRPC CONFIG REQUIRED)
|
||||
|
||||
find_program(_PROTOBUF_PROTOC protoc)
|
||||
set(_GRPC_GRPCPP grpc++)
|
||||
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(${Protobuf_INCLUDE_DIRS})
|
||||
|
||||
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
# Proto file
|
||||
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
|
||||
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||
|
||||
# Generated sources
|
||||
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
|
||||
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
|
||||
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
|
||||
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
|
||||
COMMAND ${_PROTOBUF_PROTOC}
|
||||
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
-I "${hw_proto_path}"
|
||||
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
${hw_proto_hdrs})
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
44
backend/cpp/llama/Makefile
Normal file
44
backend/cpp/llama/Makefile
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
LLAMA_VERSION?=
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
|
||||
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblast)
|
||||
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
llama.cpp/examples/grpc-server:
|
||||
mkdir -p llama.cpp/examples/grpc-server
|
||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||
|
||||
rebuild:
|
||||
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
clean:
|
||||
rm -rf llama.cpp
|
||||
rm -rf grpc-server
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
926
backend/cpp/llama/grpc-server.cpp
Normal file
926
backend/cpp/llama/grpc-server.cpp
Normal file
@@ -0,0 +1,926 @@
|
||||
// llama.cpp gRPC C++ backend server
|
||||
//
|
||||
// Ettore Di Giacinto <mudler@localai.io>
|
||||
//
|
||||
// This is a gRPC server for llama.cpp compatible with the LocalAI proto
|
||||
// Note: this is a re-adaptation of the original llama.cpp example/server.cpp for HTTP,
|
||||
// but modified to work with gRPC
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <getopt.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "grammar-parser.h"
|
||||
#include "backend.pb.h"
|
||||
#include "backend.grpc.pb.h"
|
||||
|
||||
// include std::regex
|
||||
#include <regex>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
#include <grpcpp/health_check_service_interface.h>
|
||||
|
||||
using grpc::Server;
|
||||
using grpc::ServerBuilder;
|
||||
using grpc::ServerContext;
|
||||
using grpc::Status;
|
||||
|
||||
|
||||
using backend::HealthMessage;
|
||||
|
||||
|
||||
// completion token output with probabilities
|
||||
struct completion_token_output
|
||||
{
|
||||
struct token_prob
|
||||
{
|
||||
llama_token tok;
|
||||
float prob;
|
||||
};
|
||||
|
||||
std::vector<token_prob> probs;
|
||||
llama_token tok;
|
||||
};
|
||||
|
||||
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
|
||||
{
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
enum stop_type
|
||||
{
|
||||
STOP_FULL,
|
||||
STOP_PARTIAL,
|
||||
};
|
||||
|
||||
static bool ends_with(const std::string &str, const std::string &suffix)
|
||||
{
|
||||
return str.size() >= suffix.size() &&
|
||||
0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
|
||||
}
|
||||
|
||||
static size_t find_partial_stop_string(const std::string &stop,
|
||||
const std::string &text)
|
||||
{
|
||||
if (!text.empty() && !stop.empty())
|
||||
{
|
||||
const char text_last_char = text.back();
|
||||
for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--)
|
||||
{
|
||||
if (stop[char_index] == text_last_char)
|
||||
{
|
||||
const std::string current_partial = stop.substr(0, char_index + 1);
|
||||
if (ends_with(text, current_partial))
|
||||
{
|
||||
return text.size() - char_index - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::string::npos;
|
||||
}
|
||||
|
||||
|
||||
template <class Iter>
|
||||
static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||
{
|
||||
std::string ret;
|
||||
for (; begin != end; ++begin)
|
||||
{
|
||||
ret += llama_token_to_piece(ctx, *begin);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// format incomplete utf-8 multibyte character for output
|
||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
||||
{
|
||||
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||
// (size > 1 meaning it's already a known token)
|
||||
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << std::hex << (out[0] & 0xff);
|
||||
std::string res(ss.str());
|
||||
out = "byte: \\x" + res;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
struct llama_server_context
|
||||
{
|
||||
bool stream = false;
|
||||
bool has_next_token = false;
|
||||
std::string generated_text;
|
||||
std::vector<completion_token_output> generated_token_probs;
|
||||
|
||||
size_t num_prompt_tokens = 0;
|
||||
size_t num_tokens_predicted = 0;
|
||||
size_t n_past = 0;
|
||||
size_t n_remain = 0;
|
||||
|
||||
std::vector<llama_token> embd;
|
||||
|
||||
gpt_params params;
|
||||
|
||||
llama_model *model = nullptr;
|
||||
llama_context *ctx = nullptr;
|
||||
llama_sampling_context *ctx_sampling = nullptr;
|
||||
|
||||
int n_ctx;
|
||||
|
||||
bool truncated = false;
|
||||
bool stopped_eos = false;
|
||||
bool stopped_word = false;
|
||||
bool stopped_limit = false;
|
||||
std::string stopping_word;
|
||||
int32_t multibyte_pending = 0;
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
std::unique_lock<std::mutex> lock()
|
||||
{
|
||||
return std::unique_lock<std::mutex>(mutex);
|
||||
}
|
||||
|
||||
~llama_server_context()
|
||||
{
|
||||
if (ctx)
|
||||
{
|
||||
llama_free(ctx);
|
||||
ctx = nullptr;
|
||||
}
|
||||
if (model)
|
||||
{
|
||||
llama_free_model(model);
|
||||
model = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void rewind()
|
||||
{
|
||||
params.antiprompt.clear();
|
||||
params.sparams.grammar.clear();
|
||||
num_prompt_tokens = 0;
|
||||
num_tokens_predicted = 0;
|
||||
generated_text = "";
|
||||
generated_text.reserve(n_ctx);
|
||||
generated_token_probs.clear();
|
||||
truncated = false;
|
||||
stopped_eos = false;
|
||||
stopped_word = false;
|
||||
stopped_limit = false;
|
||||
stopping_word = "";
|
||||
multibyte_pending = 0;
|
||||
n_remain = 0;
|
||||
n_past = 0;
|
||||
params.sparams.n_prev = n_ctx;
|
||||
}
|
||||
|
||||
void initSampling() {
|
||||
if (ctx_sampling != nullptr) {
|
||||
llama_sampling_free(ctx_sampling);
|
||||
}
|
||||
ctx_sampling = llama_sampling_init(params.sparams);
|
||||
}
|
||||
|
||||
bool loadModel(const gpt_params ¶ms_)
|
||||
{
|
||||
params = params_;
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||
if (model == nullptr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
n_ctx = llama_n_ctx(ctx);
|
||||
return true;
|
||||
}
|
||||
std::vector<llama_token> tokenize_string(const char *prompt, bool add_bos) const {
|
||||
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
||||
// or the first element of the json_prompt array is a string.
|
||||
std::vector<llama_token> prompt_tokens;
|
||||
auto s = std::string(prompt);
|
||||
prompt_tokens = ::llama_tokenize(ctx, s, add_bos);
|
||||
return prompt_tokens;
|
||||
}
|
||||
std::vector<llama_token> tokenize_array(const char **prompts, bool add_bos) const {
|
||||
std::vector<llama_token> prompt_tokens;
|
||||
|
||||
bool first = true;
|
||||
bool is_string = true;
|
||||
for (const char **p = prompts; *p != nullptr; ++p)
|
||||
{
|
||||
if (is_string)
|
||||
{
|
||||
auto s = std::string(*p);
|
||||
std::vector<llama_token> p;
|
||||
if (first)
|
||||
{
|
||||
p = ::llama_tokenize(ctx, s, add_bos);
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = ::llama_tokenize(ctx, s, false);
|
||||
}
|
||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
//prompt_tokens.push_back(p.template get<llama_token>());
|
||||
}
|
||||
}
|
||||
return prompt_tokens;
|
||||
}
|
||||
|
||||
void truncatePrompt(std::vector<llama_token> &prompt_tokens) {
|
||||
const int n_left = n_ctx - params.n_keep;
|
||||
const int n_block_size = n_left / 2;
|
||||
const int erased_blocks = (prompt_tokens.size() - params.n_keep - n_block_size) / n_block_size;
|
||||
|
||||
// Keep n_keep tokens at start of prompt (at most n_ctx - 4)
|
||||
std::vector<llama_token> new_tokens(prompt_tokens.begin(), prompt_tokens.begin() + params.n_keep);
|
||||
|
||||
new_tokens.insert(new_tokens.end(), prompt_tokens.begin() + params.n_keep + erased_blocks * n_block_size, prompt_tokens.end());
|
||||
|
||||
truncated = true;
|
||||
prompt_tokens = new_tokens;
|
||||
}
|
||||
|
||||
void loadInfill()
|
||||
{
|
||||
bool suff_rm_leading_spc = true;
|
||||
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
|
||||
params.input_suffix.erase(0, 1);
|
||||
suff_rm_leading_spc = false;
|
||||
}
|
||||
|
||||
auto prefix_tokens = tokenize_string(params.input_prefix.c_str(), false);
|
||||
auto suffix_tokens = tokenize_string(params.input_suffix.c_str(), false);
|
||||
const int space_token = 29871;
|
||||
if (suff_rm_leading_spc && suffix_tokens[0] == space_token) {
|
||||
suffix_tokens.erase(suffix_tokens.begin());
|
||||
}
|
||||
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
|
||||
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
|
||||
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
|
||||
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
||||
prefix_tokens.push_back(llama_token_middle(model));
|
||||
|
||||
auto prompt_tokens = prefix_tokens;
|
||||
|
||||
num_prompt_tokens = prompt_tokens.size();
|
||||
|
||||
if (params.n_keep < 0)
|
||||
{
|
||||
params.n_keep = (int)num_prompt_tokens;
|
||||
}
|
||||
params.n_keep = std::min(params.n_ctx - 4, params.n_keep);
|
||||
|
||||
// if input prompt is too big, truncate like normal
|
||||
if (num_prompt_tokens >= (size_t) n_ctx)
|
||||
{
|
||||
truncatePrompt(prompt_tokens);
|
||||
num_prompt_tokens = prompt_tokens.size();
|
||||
|
||||
GGML_ASSERT(num_prompt_tokens < (size_t)n_ctx);
|
||||
}
|
||||
|
||||
// push the prompt into the sampling context (do not apply grammar)
|
||||
for (auto & token : prompt_tokens)
|
||||
{
|
||||
llama_sampling_accept(ctx_sampling, ctx, token, false);
|
||||
}
|
||||
|
||||
// compare the evaluated prompt with the new prompt
|
||||
n_past = common_part(embd, prompt_tokens);
|
||||
embd = prompt_tokens;
|
||||
|
||||
if (n_past == num_prompt_tokens)
|
||||
{
|
||||
// we have to evaluate at least 1 token to generate logits.
|
||||
printf("we have to evaluate at least 1 token to generate logits\n");
|
||||
n_past--;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
has_next_token = true;
|
||||
}
|
||||
void loadPrompt(std::string prompt)
|
||||
{
|
||||
auto prompt_tokens = tokenize_string(prompt.c_str(), true); // always add BOS
|
||||
|
||||
num_prompt_tokens = prompt_tokens.size();
|
||||
|
||||
if (params.n_keep < 0)
|
||||
{
|
||||
params.n_keep = (int)num_prompt_tokens;
|
||||
}
|
||||
params.n_keep = std::min(n_ctx - 4, params.n_keep);
|
||||
|
||||
// if input prompt is too big, truncate like normal
|
||||
if (num_prompt_tokens >= (size_t) n_ctx)
|
||||
{
|
||||
truncatePrompt(prompt_tokens);
|
||||
num_prompt_tokens = prompt_tokens.size();
|
||||
|
||||
GGML_ASSERT(num_prompt_tokens < (size_t)n_ctx);
|
||||
}
|
||||
|
||||
// push the prompt into the sampling context (do not apply grammar)
|
||||
for (auto & token : prompt_tokens)
|
||||
{
|
||||
llama_sampling_accept(ctx_sampling, ctx, token, false);
|
||||
}
|
||||
|
||||
// compare the evaluated prompt with the new prompt
|
||||
n_past = common_part(embd, prompt_tokens);
|
||||
|
||||
embd = prompt_tokens;
|
||||
if (n_past == num_prompt_tokens)
|
||||
{
|
||||
// we have to evaluate at least 1 token to generate logits.
|
||||
n_past--;
|
||||
}
|
||||
|
||||
// since #3228 we now have to manually manage the KV cache
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
|
||||
has_next_token = true;
|
||||
}
|
||||
|
||||
void beginCompletion()
|
||||
{
|
||||
// number of tokens to keep when resetting context
|
||||
n_remain = params.n_predict;
|
||||
llama_set_rng_seed(ctx, params.seed);
|
||||
}
|
||||
|
||||
completion_token_output nextToken()
|
||||
{
|
||||
completion_token_output result;
|
||||
result.tok = -1;
|
||||
|
||||
if (embd.size() >= (size_t)n_ctx)
|
||||
{
|
||||
// Shift context
|
||||
|
||||
const int n_left = n_past - params.n_keep - 1;
|
||||
const int n_discard = n_left/2;
|
||||
|
||||
llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1);
|
||||
llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
|
||||
|
||||
for (size_t i = params.n_keep + 1 + n_discard; i < embd.size(); i++)
|
||||
{
|
||||
embd[i - n_discard] = embd[i];
|
||||
}
|
||||
embd.resize(embd.size() - n_discard);
|
||||
|
||||
n_past -= n_discard;
|
||||
|
||||
truncated = true;
|
||||
}
|
||||
|
||||
bool tg = true;
|
||||
while (n_past < embd.size())
|
||||
{
|
||||
int n_eval = (int)embd.size() - n_past;
|
||||
tg = n_eval == 1;
|
||||
if (n_eval > params.n_batch)
|
||||
{
|
||||
n_eval = params.n_batch;
|
||||
}
|
||||
|
||||
if (llama_decode(ctx, llama_batch_get_one(&embd[n_past], n_eval, n_past, 0)))
|
||||
{
|
||||
has_next_token = false;
|
||||
return result;
|
||||
}
|
||||
n_past += n_eval;
|
||||
}
|
||||
|
||||
if (params.n_predict == 0)
|
||||
{
|
||||
has_next_token = false;
|
||||
result.tok = llama_token_eos(model);
|
||||
return result;
|
||||
}
|
||||
|
||||
{
|
||||
// out of user input, sample next token
|
||||
result.tok = llama_sampling_sample(ctx_sampling, ctx, NULL);
|
||||
|
||||
llama_token_data_array cur_p = { ctx_sampling->cur.data(), ctx_sampling->cur.size(), false };
|
||||
|
||||
const int32_t n_probs = params.sparams.n_probs;
|
||||
if (params.sparams.temp <= 0 && n_probs > 0)
|
||||
{
|
||||
// For llama_sample_token_greedy we need to sort candidates
|
||||
llama_sample_softmax(ctx, &cur_p);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
|
||||
{
|
||||
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
|
||||
}
|
||||
|
||||
llama_sampling_accept(ctx_sampling, ctx, result.tok, true);
|
||||
|
||||
if (tg) {
|
||||
num_tokens_predicted++;
|
||||
}
|
||||
}
|
||||
|
||||
// add it to the context
|
||||
embd.push_back(result.tok);
|
||||
// decrement remaining sampling budget
|
||||
--n_remain;
|
||||
|
||||
if (!embd.empty() && embd.back() == llama_token_eos(model))
|
||||
{
|
||||
// stopping_word = llama_token_to_piece(ctx, embd.back());
|
||||
has_next_token = false;
|
||||
stopped_eos = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
has_next_token = params.n_predict == -1 || n_remain != 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t findStoppingStrings(const std::string &text, const size_t last_token_size,
|
||||
const stop_type type)
|
||||
{
|
||||
size_t stop_pos = std::string::npos;
|
||||
for (const std::string &word : params.antiprompt)
|
||||
{
|
||||
size_t pos;
|
||||
if (type == STOP_FULL)
|
||||
{
|
||||
const size_t tmp = word.size() + last_token_size;
|
||||
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
||||
pos = text.find(word, from_pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = find_partial_stop_string(word, text);
|
||||
}
|
||||
if (pos != std::string::npos &&
|
||||
(stop_pos == std::string::npos || pos < stop_pos))
|
||||
{
|
||||
if (type == STOP_FULL)
|
||||
{
|
||||
stopping_word = word;
|
||||
stopped_word = true;
|
||||
has_next_token = false;
|
||||
}
|
||||
stop_pos = pos;
|
||||
}
|
||||
}
|
||||
return stop_pos;
|
||||
}
|
||||
|
||||
completion_token_output doCompletion()
|
||||
{
|
||||
auto token_with_probs = nextToken();
|
||||
|
||||
const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
|
||||
generated_text += token_text;
|
||||
|
||||
if (params.sparams.n_probs > 0)
|
||||
{
|
||||
generated_token_probs.push_back(token_with_probs);
|
||||
}
|
||||
|
||||
if (multibyte_pending > 0)
|
||||
{
|
||||
multibyte_pending -= token_text.size();
|
||||
}
|
||||
else if (token_text.size() == 1)
|
||||
{
|
||||
const char c = token_text[0];
|
||||
// 2-byte characters: 110xxxxx 10xxxxxx
|
||||
if ((c & 0xE0) == 0xC0)
|
||||
{
|
||||
multibyte_pending = 1;
|
||||
// 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
}
|
||||
else if ((c & 0xF0) == 0xE0)
|
||||
{
|
||||
multibyte_pending = 2;
|
||||
// 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
}
|
||||
else if ((c & 0xF8) == 0xF0)
|
||||
{
|
||||
multibyte_pending = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
multibyte_pending = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (multibyte_pending > 0 && !has_next_token)
|
||||
{
|
||||
has_next_token = true;
|
||||
n_remain++;
|
||||
}
|
||||
|
||||
if (!has_next_token && n_remain == 0)
|
||||
{
|
||||
stopped_limit = true;
|
||||
}
|
||||
|
||||
return token_with_probs;
|
||||
}
|
||||
|
||||
std::vector<float> getEmbedding()
|
||||
{
|
||||
static const int n_embd = llama_n_embd(model);
|
||||
if (!params.embedding)
|
||||
{
|
||||
return std::vector<float>(n_embd, 0.0f);
|
||||
}
|
||||
const float *data = llama_get_embeddings(ctx);
|
||||
std::vector<float> embedding(data, data + n_embd);
|
||||
return embedding;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static void parse_options_completion(bool streaming,const backend::PredictOptions* predict, llama_server_context &llama)
|
||||
{
|
||||
gpt_params default_params;
|
||||
|
||||
llama.stream = streaming;
|
||||
llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||
llama.params.sparams.top_k = predict->topk();
|
||||
llama.params.sparams.top_p = predict->topp();
|
||||
llama.params.sparams.tfs_z = predict->tailfreesamplingz();
|
||||
llama.params.sparams.typical_p = predict->typicalp();
|
||||
llama.params.sparams.penalty_last_n = predict->repeat();
|
||||
llama.params.sparams.temp = predict->temperature();
|
||||
llama.params.sparams.penalty_repeat = predict->penalty();
|
||||
llama.params.sparams.penalty_present = predict->presencepenalty();
|
||||
llama.params.sparams.penalty_freq = predict->frequencypenalty();
|
||||
llama.params.sparams.mirostat = predict->mirostat();
|
||||
llama.params.sparams.mirostat_tau = predict->mirostattau();
|
||||
llama.params.sparams.mirostat_eta = predict->mirostateta();
|
||||
llama.params.sparams.penalize_nl = predict->penalizenl();
|
||||
llama.params.n_keep = predict->nkeep();
|
||||
llama.params.seed = predict->seed();
|
||||
llama.params.sparams.grammar = predict->grammar();
|
||||
// llama.params.n_probs = predict->
|
||||
llama.params.prompt = predict->prompt();
|
||||
|
||||
llama.params.sparams.logit_bias.clear();
|
||||
|
||||
if (predict->ignoreeos())
|
||||
{
|
||||
llama.params.sparams.logit_bias[llama_token_eos(llama.model)] = -INFINITY;
|
||||
}
|
||||
|
||||
// const auto &logit_bias = body.find("logit_bias");
|
||||
// if (logit_bias != body.end() && logit_bias->is_array())
|
||||
// {
|
||||
// const int n_vocab = llama_n_vocab(llama.model);
|
||||
// for (const auto &el : *logit_bias)
|
||||
// {
|
||||
// if (el.is_array() && el.size() == 2 && el[0].is_number_integer())
|
||||
// {
|
||||
// llama_token tok = el[0].get<llama_token>();
|
||||
// if (tok >= 0 && tok < n_vocab)
|
||||
// {
|
||||
// if (el[1].is_number())
|
||||
// {
|
||||
// llama.params.logit_bias[tok] = el[1].get<float>();
|
||||
// }
|
||||
// else if (el[1].is_boolean() && !el[1].get<bool>())
|
||||
// {
|
||||
// llama.params.logit_bias[tok] = -INFINITY;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
llama.params.antiprompt.clear();
|
||||
for (const std::string& stopPrompt : predict->stopprompts()) {
|
||||
if (!stopPrompt.empty())
|
||||
{
|
||||
llama.params.antiprompt.push_back(stopPrompt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void params_parse(const backend::ModelOptions* request,
|
||||
gpt_params & params) {
|
||||
|
||||
params.model = request->modelfile();
|
||||
// params.model_alias ??
|
||||
params.model_alias = request->modelfile();
|
||||
params.n_ctx = request->contextsize();
|
||||
params.memory_f16 = request->f16memory();
|
||||
params.n_threads = request->threads();
|
||||
params.n_gpu_layers = request->ngpulayers();
|
||||
params.n_batch = request->nbatch();
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
std::string arg_next = request->tensorsplit();
|
||||
|
||||
// split string by , and /
|
||||
const std::regex regex{ R"([,/]+)" };
|
||||
std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 };
|
||||
std::vector<std::string> split_arg{ it, {} };
|
||||
|
||||
GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES);
|
||||
|
||||
for (size_t i_device = 0; i_device < LLAMA_MAX_DEVICES; ++i_device) {
|
||||
if (i_device < split_arg.size()) {
|
||||
params.tensor_split[i_device] = std::stof(split_arg[i_device]);
|
||||
}
|
||||
else {
|
||||
params.tensor_split[i_device] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!request->maingpu().empty()) {
|
||||
params.main_gpu = std::stoi(request->maingpu());
|
||||
}
|
||||
// TODO: lora needs also a scale factor
|
||||
//params.lora_adapter = request->loraadapter();
|
||||
//params.lora_base = request->lorabase();
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
params.embedding = request->embeddings();
|
||||
}
|
||||
|
||||
static bool is_at_eob(llama_server_context &server_context, const llama_token *tokens, const size_t n_tokens) {
|
||||
return n_tokens && tokens[n_tokens-1] == llama_token_eos(server_context.model);
|
||||
}
|
||||
|
||||
// Function matching type llama_beam_search_callback_fn_t.
|
||||
// Custom callback example is called each time the beams lengths increase:
|
||||
// * Show progress by printing ',' following by number of convergent beam tokens if any.
|
||||
// * When all beams converge to a common prefix, they are made available in beams_state.beams[0].
|
||||
// This is also called when the stop condition is met.
|
||||
// Collect tokens into std::vector<llama_token> response which is pointed to by callback_data.
|
||||
static void beam_search_callback(void *callback_data, llama_beams_state beams_state) {
|
||||
auto & llama = *static_cast<llama_server_context*>(callback_data);
|
||||
// Mark beams as EOS as needed.
|
||||
for (size_t i = 0 ; i < beams_state.n_beams ; ++i) {
|
||||
llama_beam_view& beam_view = beams_state.beam_views[i];
|
||||
if (!beam_view.eob && is_at_eob(llama, beam_view.tokens, beam_view.n_tokens)) {
|
||||
beam_view.eob = true;
|
||||
}
|
||||
}
|
||||
printf(","); // Show progress
|
||||
if (const size_t n = beams_state.common_prefix_length) {
|
||||
llama.generated_token_probs.resize(llama.generated_token_probs.size() + n);
|
||||
assert(0u < beams_state.n_beams);
|
||||
const llama_token * tokens = beams_state.beam_views[0].tokens;
|
||||
const auto map = [](llama_token tok) { return completion_token_output{{},tok}; };
|
||||
std::transform(tokens, tokens + n, llama.generated_token_probs.end() - n, map);
|
||||
printf("%zu", n);
|
||||
}
|
||||
fflush(stdout);
|
||||
#if 0 // DEBUG: print current beams for this iteration
|
||||
std::cout << "\n\nCurrent beams:\n";
|
||||
for (size_t i=0 ; i < beams_state.n_beams ; ++i) {
|
||||
std::cout << "beams["<<i<<"]: " << ostream_beam_view{state.ctx,beams_state.beam_views[i]} << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
struct token_translator {
|
||||
llama_context * ctx;
|
||||
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
|
||||
std::string operator()(const completion_token_output & cto) const { return (*this)(cto.tok); }
|
||||
};
|
||||
|
||||
|
||||
static void append_to_generated_text_from_generated_token_probs(llama_server_context &llama)
|
||||
{
|
||||
auto & gtps = llama.generated_token_probs;
|
||||
auto translator = token_translator{llama.ctx};
|
||||
auto add_strlen = [=](size_t sum, const completion_token_output & cto) { return sum + translator(cto).size(); };
|
||||
const size_t len = std::accumulate(gtps.begin(), gtps.end(), size_t(0), add_strlen);
|
||||
if (llama.generated_text.capacity() < llama.generated_text.size() + len) {
|
||||
llama.generated_text.reserve(llama.generated_text.size() + len);
|
||||
}
|
||||
for (const completion_token_output & cto : gtps) {
|
||||
llama.generated_text += translator(cto);
|
||||
}
|
||||
}
|
||||
|
||||
// GRPC Server start
|
||||
class BackendServiceImpl final : public backend::Backend::Service {
|
||||
// The class has a llama instance that is shared across all RPCs
|
||||
llama_server_context llama;
|
||||
public:
|
||||
grpc::Status Health(ServerContext* context, const backend::HealthMessage* request, backend::Reply* reply) {
|
||||
// Implement Health RPC
|
||||
reply->set_message("OK");
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
||||
// Implement LoadModel RPC
|
||||
gpt_params params;
|
||||
params_parse(request, params);
|
||||
|
||||
llama_backend_init(params.numa);
|
||||
|
||||
// load the model
|
||||
if (!llama.loadModel(params))
|
||||
{
|
||||
result->set_message("Failed loading model");
|
||||
result->set_success(false);
|
||||
return Status::CANCELLED;
|
||||
}
|
||||
result->set_message("Loading succeeded");
|
||||
result->set_success(true);
|
||||
return Status::OK;
|
||||
}
|
||||
grpc::Status PredictStream(grpc::ServerContext* context, const backend::PredictOptions* request, grpc::ServerWriter<backend::Reply>* writer) override {
|
||||
// Implement the streaming logic here based on the request options
|
||||
// You can use writer->Write(response) to send a reply to the client
|
||||
// and return grpc::Status::OK when the operation is complete.
|
||||
auto lock = llama.lock();
|
||||
|
||||
llama.rewind();
|
||||
|
||||
llama_reset_timings(llama.ctx);
|
||||
|
||||
parse_options_completion(false, request, llama);
|
||||
|
||||
llama.initSampling();
|
||||
llama.loadPrompt(request->prompt());
|
||||
llama.beginCompletion();
|
||||
size_t sent_count = 0;
|
||||
size_t sent_token_probs_index = 0;
|
||||
|
||||
while (llama.has_next_token) {
|
||||
const completion_token_output token_with_probs = llama.doCompletion();
|
||||
if (token_with_probs.tok == -1 || llama.multibyte_pending > 0) {
|
||||
continue;
|
||||
}
|
||||
const std::string token_text = llama_token_to_piece(llama.ctx, token_with_probs.tok);
|
||||
|
||||
size_t pos = std::min(sent_count, llama.generated_text.size());
|
||||
|
||||
const std::string str_test = llama.generated_text.substr(pos);
|
||||
bool is_stop_full = false;
|
||||
size_t stop_pos =
|
||||
llama.findStoppingStrings(str_test, token_text.size(), STOP_FULL);
|
||||
if (stop_pos != std::string::npos) {
|
||||
is_stop_full = true;
|
||||
llama.generated_text.erase(
|
||||
llama.generated_text.begin() + pos + stop_pos,
|
||||
llama.generated_text.end());
|
||||
pos = std::min(sent_count, llama.generated_text.size());
|
||||
} else {
|
||||
is_stop_full = false;
|
||||
stop_pos = llama.findStoppingStrings(str_test, token_text.size(),
|
||||
STOP_PARTIAL);
|
||||
}
|
||||
|
||||
if (
|
||||
stop_pos == std::string::npos ||
|
||||
// Send rest of the text if we are at the end of the generation
|
||||
(!llama.has_next_token && !is_stop_full && stop_pos > 0)
|
||||
) {
|
||||
const std::string to_send = llama.generated_text.substr(pos, std::string::npos);
|
||||
|
||||
sent_count += to_send.size();
|
||||
|
||||
std::vector<completion_token_output> probs_output = {};
|
||||
|
||||
if (llama.params.sparams.n_probs > 0) {
|
||||
const std::vector<llama_token> to_send_toks = llama_tokenize(llama.ctx, to_send, false);
|
||||
size_t probs_pos = std::min(sent_token_probs_index, llama.generated_token_probs.size());
|
||||
size_t probs_stop_pos = std::min(sent_token_probs_index + to_send_toks.size(), llama.generated_token_probs.size());
|
||||
if (probs_pos < probs_stop_pos) {
|
||||
probs_output = std::vector<completion_token_output>(llama.generated_token_probs.begin() + probs_pos, llama.generated_token_probs.begin() + probs_stop_pos);
|
||||
}
|
||||
sent_token_probs_index = probs_stop_pos;
|
||||
}
|
||||
backend::Reply reply;
|
||||
reply.set_message(to_send);
|
||||
|
||||
// Send the reply
|
||||
writer->Write(reply);
|
||||
}
|
||||
}
|
||||
|
||||
llama_print_timings(llama.ctx);
|
||||
|
||||
llama.mutex.unlock();
|
||||
lock.release();
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
|
||||
grpc::Status Predict(ServerContext* context, const backend::PredictOptions* request, backend::Reply* reply) {
|
||||
auto lock = llama.lock();
|
||||
llama.rewind();
|
||||
llama_reset_timings(llama.ctx);
|
||||
parse_options_completion(false, request, llama);
|
||||
|
||||
llama.initSampling();
|
||||
llama.loadPrompt(request->prompt());
|
||||
llama.beginCompletion();
|
||||
|
||||
if (llama.params.n_beams) {
|
||||
// Fill llama.generated_token_probs vector with final beam.
|
||||
llama_beam_search(llama.ctx, beam_search_callback, &llama, llama.params.n_beams,
|
||||
llama.n_past, llama.n_remain);
|
||||
// Translate llama.generated_token_probs to llama.generated_text.
|
||||
append_to_generated_text_from_generated_token_probs(llama);
|
||||
} else {
|
||||
size_t stop_pos = std::string::npos;
|
||||
|
||||
while (llama.has_next_token) {
|
||||
const completion_token_output token_with_probs = llama.doCompletion();
|
||||
const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(llama.ctx, token_with_probs.tok);
|
||||
|
||||
stop_pos = llama.findStoppingStrings(llama.generated_text,
|
||||
token_text.size(), STOP_FULL);
|
||||
}
|
||||
|
||||
if (stop_pos == std::string::npos) {
|
||||
stop_pos = llama.findStoppingStrings(llama.generated_text, 0, STOP_PARTIAL);
|
||||
}
|
||||
if (stop_pos != std::string::npos) {
|
||||
llama.generated_text.erase(llama.generated_text.begin() + stop_pos,
|
||||
llama.generated_text.end());
|
||||
}
|
||||
}
|
||||
|
||||
auto probs = llama.generated_token_probs;
|
||||
if (llama.params.sparams.n_probs > 0 && llama.stopped_word) {
|
||||
const std::vector<llama_token> stop_word_toks = llama_tokenize(llama.ctx, llama.stopping_word, false);
|
||||
probs = std::vector<completion_token_output>(llama.generated_token_probs.begin(), llama.generated_token_probs.end() - stop_word_toks.size());
|
||||
}
|
||||
reply->set_message(llama.generated_text);
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
};
|
||||
|
||||
void RunServer(const std::string& server_address) {
|
||||
BackendServiceImpl service;
|
||||
|
||||
ServerBuilder builder;
|
||||
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(&service);
|
||||
|
||||
std::unique_ptr<Server> server(builder.BuildAndStart());
|
||||
std::cout << "Server listening on " << server_address << std::endl;
|
||||
server->Wait();
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
std::string server_address("localhost:50051");
|
||||
|
||||
// Define long and short options
|
||||
struct option long_options[] = {
|
||||
{"addr", required_argument, nullptr, 'a'},
|
||||
{nullptr, 0, nullptr, 0}
|
||||
};
|
||||
|
||||
// Parse command-line arguments
|
||||
int option;
|
||||
int option_index = 0;
|
||||
while ((option = getopt_long(argc, argv, "a:", long_options, &option_index)) != -1) {
|
||||
switch (option) {
|
||||
case 'a':
|
||||
server_address = optarg;
|
||||
break;
|
||||
default:
|
||||
std::cerr << "Usage: " << argv[0] << " [--addr=<address>] or [-a <address>]" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
RunServer(server_address);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
package main
|
||||
|
||||
// GRPC Falcon server
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
falcon "github.com/go-skynet/LocalAI/pkg/backend/llm/falcon"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -12,4 +12,5 @@ services:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
- ./images/:/tmp/generated/images/
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||
|
||||
OPENAI_API_KEY=sk---anystringhere
|
||||
OPENAI_API_BASE=http://api:8080/v1
|
||||
# Models to preload at start
|
||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
|
||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
|
||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
|
||||
# see other options in the model gallery at https://github.com/go-skynet/model-gallery
|
||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
|
||||
@@ -10,12 +10,16 @@ git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/autoGPT
|
||||
|
||||
cp -rfv .env.example .env
|
||||
|
||||
# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
|
||||
vim .env
|
||||
|
||||
docker-compose run --rm auto-gpt
|
||||
```
|
||||
|
||||
Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
|
||||
|
||||
See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
|
||||
|
||||
## Without docker
|
||||
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: backend monitor
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
meta {
|
||||
name: backend-shutdown
|
||||
type: http
|
||||
seq: 3
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
5
examples/bruno/LocalAI Test Requests/bruno.json
Normal file
5
examples/bruno/LocalAI Test Requests/bruno.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"version": "1",
|
||||
"name": "LocalAI Test Requests",
|
||||
"type": "collection"
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
vars {
|
||||
HOST: localhost
|
||||
PORT: 8080
|
||||
DEFAULT_MODEL: gpt-3.5-turbo
|
||||
PROTOCOL: http://
|
||||
}
|
||||
11
examples/bruno/LocalAI Test Requests/get models list.bru
Normal file
11
examples/bruno/LocalAI Test Requests/get models list.bru
Normal file
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: get models list
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
meta {
|
||||
name: Generate image
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "model-name",
|
||||
"step": 51,
|
||||
"size": "1024x1024",
|
||||
"image": ""
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
meta {
|
||||
name: -completions
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
23
examples/bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
23
examples/bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
@@ -0,0 +1,23 @@
|
||||
meta {
|
||||
name: -edits
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "What day of the wek is it?",
|
||||
"instruction": "Fix the spelling mistakes"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: -embeddings
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
meta {
|
||||
name: chat completion -simple- 1 message-
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How could one use friction to cook an egg?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.2,
|
||||
"grammar": ""
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
meta {
|
||||
name: chat-completions -long-
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||
{"role": "assistant",
|
||||
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||
},
|
||||
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
meta {
|
||||
name: chat-completions -stream-
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.9,
|
||||
"stream": true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: add model gallery
|
||||
type: http
|
||||
seq: 10
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
meta {
|
||||
name: delete model gallery
|
||||
type: http
|
||||
seq: 11
|
||||
}
|
||||
|
||||
delete {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: list MODELS in galleries
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: list model GALLERIES
|
||||
type: http
|
||||
seq: 8
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
meta {
|
||||
name: model gallery apply -gist-
|
||||
type: http
|
||||
seq: 12
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: model gallery apply
|
||||
type: http
|
||||
seq: 9
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||
"name": "codellama7b"
|
||||
}
|
||||
}
|
||||
22
examples/bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
22
examples/bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: -tts
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
1
examples/chatbot-ui-manual/models
Symbolic link
1
examples/chatbot-ui-manual/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../models
|
||||
42
examples/configurations/README.md
Normal file
42
examples/configurations/README.md
Normal file
@@ -0,0 +1,42 @@
|
||||
## Advanced configuration
|
||||
|
||||
This section contains examples on how to install models manually with config files.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
First clone LocalAI:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI
|
||||
```
|
||||
|
||||
Setup the model you prefer from the examples below and then start LocalAI:
|
||||
|
||||
```bash
|
||||
docker compose up -d --pull always
|
||||
```
|
||||
|
||||
If LocalAI is already started, you can restart it with
|
||||
|
||||
```bash
|
||||
docker compose restart
|
||||
```
|
||||
|
||||
See also the getting started: https://localai.io/basics/getting_started/
|
||||
|
||||
### Mistral
|
||||
|
||||
To setup mistral copy the files inside `mistral` in the `models` folder:
|
||||
|
||||
```bash
|
||||
cp -r examples/configurations/mistral/* models/
|
||||
```
|
||||
|
||||
Now download the model:
|
||||
|
||||
```bash
|
||||
wget https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf -O models/mistral-7b-openorca.Q6_K.gguf
|
||||
```
|
||||
|
||||
3
examples/configurations/mistral/chatml-block.tmpl
Normal file
3
examples/configurations/mistral/chatml-block.tmpl
Normal file
@@ -0,0 +1,3 @@
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
|
||||
3
examples/configurations/mistral/chatml.tmpl
Normal file
3
examples/configurations/mistral/chatml.tmpl
Normal file
@@ -0,0 +1,3 @@
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
<|im_end|>
|
||||
16
examples/configurations/mistral/mistral.yaml
Normal file
16
examples/configurations/mistral/mistral.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: mistral
|
||||
mmap: true
|
||||
parameters:
|
||||
model: mistral-7b-openorca.Q6_K.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
template:
|
||||
chat_message: chatml
|
||||
chat: chatml-block
|
||||
completion: completion
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
threads: 4
|
||||
@@ -1,3 +1,6 @@
|
||||
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||
|
||||
OPENAI_API_KEY=x
|
||||
DISCORD_BOT_TOKEN=x
|
||||
DISCORD_CLIENT_ID=x
|
||||
|
||||
@@ -1 +1 @@
|
||||
../chatbot-ui/models/
|
||||
../models
|
||||
@@ -1,7 +1,11 @@
|
||||
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||
|
||||
OPENAI_API_KEY=sk---anystringhere
|
||||
OPENAI_API_BASE=http://api:8080/v1
|
||||
# Models to preload at start
|
||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
|
||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
|
||||
# see other options in the model gallery at https://github.com/go-skynet/model-gallery
|
||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/openllama-7b-open-instruct.yaml", "name": "gpt-3.5-turbo"}]
|
||||
|
||||
## Change the default number of threads
|
||||
@@ -10,9 +10,12 @@ git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/functions
|
||||
|
||||
cp -rfv .env.example .env
|
||||
|
||||
# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
|
||||
vim .env
|
||||
|
||||
docker-compose run --rm functions
|
||||
```
|
||||
|
||||
Note: The example automatically downloads the `openllama` model as it is under a permissive license.
|
||||
|
||||
See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||
|
||||
THREADS=4
|
||||
CONTEXT_SIZE=512
|
||||
MODELS_PATH=/models
|
||||
|
||||
1
examples/langchain-chroma/models
Symbolic link
1
examples/langchain-chroma/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../models
|
||||
@@ -1,16 +0,0 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
@@ -1,4 +0,0 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
1
examples/langchain-huggingface/models
Symbolic link
1
examples/langchain-huggingface/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../models
|
||||
@@ -1 +0,0 @@
|
||||
{{.Input}}
|
||||
@@ -1,17 +0,0 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: gpt2
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
backend: "langchain-huggingface"
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
@@ -1,4 +0,0 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
1
examples/langchain/models
Symbolic link
1
examples/langchain/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../models
|
||||
@@ -1 +0,0 @@
|
||||
{{.Input}}
|
||||
@@ -1,17 +0,0 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
backend: "gptj"
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
@@ -1,4 +0,0 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
@@ -8,8 +8,6 @@ services:
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai"]
|
||||
|
||||
7
examples/models/.gitignore
vendored
Normal file
7
examples/models/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
# Ignore everything but predefined models
|
||||
*
|
||||
!.gitignore
|
||||
!completion.tmpl
|
||||
!embeddings.yaml
|
||||
!gpt4all.tmpl
|
||||
!gpt-3.5-turbo.yaml
|
||||
1
examples/query_data/models
Symbolic link
1
examples/query_data/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../models
|
||||
@@ -1 +0,0 @@
|
||||
{{.Input}}
|
||||
@@ -1,6 +0,0 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
threads: 14
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
@@ -1,16 +0,0 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
@@ -1,3 +1,6 @@
|
||||
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||
|
||||
SLACK_APP_TOKEN=xapp-1-...
|
||||
SLACK_BOT_TOKEN=xoxb-...
|
||||
OPENAI_API_KEY=sk-...
|
||||
|
||||
@@ -18,7 +18,7 @@ git clone https://github.com/seratch/ChatGPT-in-Slack
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
|
||||
# Set the Slack bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
|
||||
cp -rfv .env.example .env
|
||||
vim .env
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
../chatbot-ui/models
|
||||
../models
|
||||
@@ -1,3 +1,6 @@
|
||||
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||
|
||||
# Create an app-level token with connections:write scope
|
||||
SLACK_APP_TOKEN=xapp-1-...
|
||||
# Install the app into your workspace to grab this token
|
||||
|
||||
53
go.mod
53
go.mod
@@ -9,9 +9,9 @@ require (
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
|
||||
github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230916071220-b471eb7d8c93
|
||||
github.com/gofiber/fiber/v2 v2.49.2
|
||||
github.com/google/uuid v1.3.1
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
|
||||
github.com/gofiber/fiber/v2 v2.50.0
|
||||
github.com/google/uuid v1.4.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/hpcloud/tail v1.0.0
|
||||
github.com/imdario/mergo v0.3.16
|
||||
@@ -20,17 +20,23 @@ require (
|
||||
github.com/mudler/go-ggllm.cpp v0.0.0-20230709223052-862477d16eef
|
||||
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
|
||||
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230916184256-e86c63750de2
|
||||
github.com/onsi/ginkgo/v2 v2.12.1
|
||||
github.com/onsi/gomega v1.27.10
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530
|
||||
github.com/onsi/ginkgo/v2 v2.13.0
|
||||
github.com/onsi/gomega v1.28.1
|
||||
github.com/otiai10/openaigo v1.6.0
|
||||
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
|
||||
github.com/rs/zerolog v1.30.0
|
||||
github.com/sashabaranov/go-openai v1.15.3
|
||||
github.com/tmc/langchaingo v0.0.0-20230923172951-2c309cf27956
|
||||
github.com/prometheus/client_golang v1.17.0
|
||||
github.com/rs/zerolog v1.31.0
|
||||
github.com/sashabaranov/go-openai v1.16.0
|
||||
github.com/schollz/progressbar/v3 v3.13.1
|
||||
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
|
||||
github.com/urfave/cli/v2 v2.25.7
|
||||
github.com/valyala/fasthttp v1.50.0
|
||||
google.golang.org/grpc v1.58.2
|
||||
go.opentelemetry.io/otel v1.19.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.42.0
|
||||
go.opentelemetry.io/otel/metric v1.19.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.19.0
|
||||
google.golang.org/grpc v1.59.0
|
||||
google.golang.org/protobuf v1.31.0
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
@@ -40,7 +46,7 @@ require (
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/shirou/gopsutil/v3 v3.23.8
|
||||
github.com/shirou/gopsutil/v3 v3.23.9
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
@@ -48,21 +54,30 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/dlclark/regexp2 v1.8.1 // indirect
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/golang/protobuf v1.5.3 // indirect
|
||||
github.com/golang/snappy v0.0.2 // indirect
|
||||
github.com/klauspost/pgzip v1.2.5 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/nwaples/rardecode v1.1.0 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.2 // indirect
|
||||
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
|
||||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
|
||||
github.com/prometheus/common v0.44.0 // indirect
|
||||
github.com/prometheus/procfs v0.11.1 // indirect
|
||||
github.com/ulikunitz/xz v0.5.9 // indirect
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.19.0 // indirect
|
||||
golang.org/x/term v0.13.0 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
|
||||
gopkg.in/fsnotify.v1 v1.4.7 // indirect
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
|
||||
)
|
||||
@@ -74,7 +89,7 @@ require (
|
||||
github.com/go-audio/riff v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.16.7 // indirect
|
||||
@@ -87,8 +102,8 @@ require (
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.14.0 // indirect
|
||||
golang.org/x/sys v0.12.0 // indirect
|
||||
golang.org/x/text v0.12.0 // indirect
|
||||
golang.org/x/net v0.17.0 // indirect
|
||||
golang.org/x/sys v0.13.0 // indirect
|
||||
golang.org/x/text v0.13.0 // indirect
|
||||
golang.org/x/tools v0.12.0 // indirect
|
||||
)
|
||||
|
||||
127
go.sum
127
go.sum
@@ -1,13 +1,16 @@
|
||||
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
|
||||
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
|
||||
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@@ -29,8 +32,11 @@ github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
|
||||
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
|
||||
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
|
||||
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa h1:gxr68r/6EWroay4iI81jxqGCDbKotY4+CiwdUkBz2NQ=
|
||||
@@ -39,14 +45,14 @@ github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGt
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc=
|
||||
github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e h1:4reMY29i1eOZaRaSTMPNyXI7X8RMNxCTfDDBXYzrbr0=
|
||||
github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230916071220-b471eb7d8c93 h1:aJQn0Lb5TQtSXw5hK+kAUuaUjsXwDXyEItzI9U+GiOA=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230916071220-b471eb7d8c93/go.mod h1:VGPpMv2UMxRgBiDIFrs7AGMzrX884K0Vy8tqYu3if/Q=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
|
||||
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.49.2 h1:ONEN3/Vc+dUCxxDgZZwpqvhISgHqb+bu+isBiEyKEQs=
|
||||
github.com/gofiber/fiber/v2 v2.49.2/go.mod h1:gNsKnyrmfEWFpJxQAV0qvW6l70K1dZGno12oLtukcts=
|
||||
github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
|
||||
github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
||||
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
||||
@@ -67,11 +73,15 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
|
||||
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
|
||||
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
|
||||
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
@@ -83,6 +93,7 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
|
||||
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
|
||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
|
||||
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
|
||||
@@ -90,27 +101,30 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
|
||||
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
|
||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
|
||||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
|
||||
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
|
||||
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mudler/go-ggllm.cpp v0.0.0-20230709223052-862477d16eef h1:OJZtJ5vYhlkTJI0RHIl62kOkhiINQEhZgsXlwmmNDhM=
|
||||
@@ -121,8 +135,12 @@ github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGw
|
||||
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0=
|
||||
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks=
|
||||
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230916184256-e86c63750de2 h1:nBbgd4ORk4pubuh/kugHw5Sdytt+SxtJLEQ+vJfK6Ko=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230916184256-e86c63750de2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231013181651-22de3c56bdd4 h1:82J4t94Mmt0lva/OoxNlHkKrMSdSUZXkAjTFnlFFsow=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231013181651-22de3c56bdd4/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231016205817-9a19c740ee84 h1:AiFzd+M2Uxz67fdn4nCnKR70me5yf88rXhoqhvfRDak=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231016205817-9a19c740ee84/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530 h1:YXMxHwHMB9jCBo2Yu5gz3mTB3T1TnZs/HmPLv15LUSA=
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
|
||||
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
|
||||
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
|
||||
@@ -132,13 +150,15 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
|
||||
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
|
||||
github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc=
|
||||
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
|
||||
github.com/onsi/ginkgo/v2 v2.12.1 h1:uHNEO1RP2SpuZApSkel9nEh1/Mu+hmQe7Q+Pepg5OYA=
|
||||
github.com/onsi/ginkgo/v2 v2.12.1/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o=
|
||||
github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4=
|
||||
github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o=
|
||||
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
|
||||
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
|
||||
github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
|
||||
github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI=
|
||||
github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M=
|
||||
github.com/onsi/gomega v1.28.0 h1:i2rg/p9n/UqIDAMFUJ6qIUUMcsqOuUHgbpbu235Vr1c=
|
||||
github.com/onsi/gomega v1.28.0/go.mod h1:A1H2JE76sI14WIP57LMKj7FVfCHx3g3BcZVjJG8bjX8=
|
||||
github.com/onsi/gomega v1.28.1 h1:MijcGUbfYuznzK/5R4CPNoUP/9Xvuo20sXfEm6XxoTA=
|
||||
github.com/onsi/gomega v1.28.1/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ=
|
||||
github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
|
||||
github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
|
||||
github.com/otiai10/openaigo v1.6.0 h1:YTQEbtDSvawETOB/Kmb/6JvuHdHH/eIpSQfHVufiwY8=
|
||||
@@ -154,18 +174,30 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
|
||||
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
|
||||
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
|
||||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM=
|
||||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
|
||||
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
|
||||
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
|
||||
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
|
||||
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
|
||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.30.0 h1:SymVODrcRsaRaSInD9yQtKbtWqwsfoPcRff/oRXLj4c=
|
||||
github.com/rs/zerolog v1.30.0/go.mod h1:/tk+P47gFdPXq4QYjvCmT5/Gsug2nagsFWBWhAiSi1w=
|
||||
github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
|
||||
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.15.3 h1:rzoNK9n+Cak+PM6OQ9puxDmFllxfnVea9StlmhglXqA=
|
||||
github.com/sashabaranov/go-openai v1.15.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.16.0 h1:34W6WV84ey6OpW0p2UewZkdMu82AxGC+BzpU6iiauRw=
|
||||
github.com/sashabaranov/go-openai v1.16.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
|
||||
github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ=
|
||||
github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4=
|
||||
github.com/shirou/gopsutil/v3 v3.23.8 h1:xnATPiybo6GgdRoC4YoGnxXZFRc3dqQTGi73oLvvBrE=
|
||||
github.com/shirou/gopsutil/v3 v3.23.8/go.mod h1:7hmCaBn+2ZwaZOr6jmPBZDfawwMGuo1id3C6aM8EDqQ=
|
||||
github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E=
|
||||
github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA=
|
||||
github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
|
||||
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
|
||||
github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
|
||||
@@ -186,10 +218,10 @@ github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0h
|
||||
github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=
|
||||
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
|
||||
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
|
||||
github.com/tmc/langchaingo v0.0.0-20230910230029-9c8845b2b019 h1:hKNvPanCqhpVszNBKYBzwknU2iJhhE29P04wpdpu/uY=
|
||||
github.com/tmc/langchaingo v0.0.0-20230910230029-9c8845b2b019/go.mod h1:vCdA1t5qnS5YPkDsznowOziBHFn0Ul11ZqfJ2GOAi0s=
|
||||
github.com/tmc/langchaingo v0.0.0-20230923172951-2c309cf27956 h1:N6H+Jd6Hf9vL57N25s0nHYYouDj4wUpLDd6rMNDfU+Y=
|
||||
github.com/tmc/langchaingo v0.0.0-20230923172951-2c309cf27956/go.mod h1:vCdA1t5qnS5YPkDsznowOziBHFn0Ul11ZqfJ2GOAi0s=
|
||||
github.com/tmc/langchaingo v0.0.0-20231016073620-a02d4fdc0f3a h1:BziGpoF5ZVWMDy6Z1adXnYndRye2fiYWZlmknUFksGA=
|
||||
github.com/tmc/langchaingo v0.0.0-20231016073620-a02d4fdc0f3a/go.mod h1:SiwyRS7sBSSi6f3NB4dKENw69X6br/wZ2WRkM+8pZWk=
|
||||
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701 h1:LquLgmFiKf6eDXdwoUKCIGn5NsR34cLXC6ySYhiE6bA=
|
||||
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7sBSSi6f3NB4dKENw69X6br/wZ2WRkM+8pZWk=
|
||||
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
|
||||
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
@@ -208,6 +240,18 @@ github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsr
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw=
|
||||
github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
|
||||
go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
|
||||
go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 h1:jwV9iQdvp38fxXi8ZC+lNpxjK16MRcZlpDYvbuO1FiA=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.42.0/go.mod h1:f3bYiqNqhoPxkvI2LrXqQVC546K7BuRDL/kKuxkujhA=
|
||||
go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE=
|
||||
go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8=
|
||||
go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o=
|
||||
go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8Ni+hx+8i1k=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY=
|
||||
go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg=
|
||||
go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
@@ -220,9 +264,10 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
|
||||
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
|
||||
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
|
||||
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
|
||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
@@ -239,22 +284,24 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
|
||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
|
||||
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
||||
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
|
||||
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc=
|
||||
golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
@@ -266,10 +313,12 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 h1:bVf09lpb+OJbByTj913DRJioFFAjf/ZGxEz7MajTp2U=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM=
|
||||
google.golang.org/grpc v1.58.1 h1:OL+Vz23DTtrrldqHK49FUOPHyY75rvFqJfXC84NYW58=
|
||||
google.golang.org/grpc v1.58.1/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
|
||||
google.golang.org/grpc v1.58.2 h1:SXUpjxeVF3FKrTYQI4f4KvbGD5u2xccdYdurwowix5I=
|
||||
google.golang.org/grpc v1.58.2/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M=
|
||||
google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ=
|
||||
google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
|
||||
google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk=
|
||||
google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98=
|
||||
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
||||
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
||||
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
||||
|
||||
205
main.go
205
main.go
@@ -1,6 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -8,11 +12,16 @@ import (
|
||||
"syscall"
|
||||
|
||||
api "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/api/backend"
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
progressbar "github.com/schollz/progressbar/v3"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
@@ -164,7 +173,6 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
|
||||
UsageText: `local-ai [options]`,
|
||||
Copyright: "Ettore Di Giacinto",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
|
||||
opts := []options.AppOption{
|
||||
options.WithConfigFile(ctx.String("config-file")),
|
||||
options.WithJSONStringPreload(ctx.String("preload-models")),
|
||||
@@ -207,6 +215,12 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
|
||||
return err
|
||||
}
|
||||
|
||||
metrics, err := metrics.SetupMetrics()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts = append(opts, options.WithMetrics(metrics))
|
||||
|
||||
app, err := api.App(opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -214,6 +228,195 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
|
||||
|
||||
return app.Listen(ctx.String("address"))
|
||||
},
|
||||
Commands: []*cli.Command{
|
||||
{
|
||||
Name: "models",
|
||||
Usage: "List or install models",
|
||||
Subcommands: []*cli.Command{
|
||||
{
|
||||
Name: "list",
|
||||
Usage: "List the models avaiable in your galleries",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
var galleries []gallery.Gallery
|
||||
if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil {
|
||||
log.Error().Msgf("unable to load galleries: %s", err.Error())
|
||||
}
|
||||
|
||||
models, err := gallery.AvailableGalleryModels(galleries, ctx.String("models-path"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, model := range models {
|
||||
if model.Installed {
|
||||
fmt.Printf(" * %s@%s (installed)\n", model.Gallery.Name, model.Name)
|
||||
} else {
|
||||
fmt.Printf(" - %s@%s\n", model.Gallery.Name, model.Name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "install",
|
||||
Usage: "Install a model from the gallery",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
modelName := ctx.Args().First()
|
||||
|
||||
var galleries []gallery.Gallery
|
||||
if err := json.Unmarshal([]byte(ctx.String("galleries")), &galleries); err != nil {
|
||||
log.Error().Msgf("unable to load galleries: %s", err.Error())
|
||||
}
|
||||
|
||||
progressBar := progressbar.NewOptions(
|
||||
1000,
|
||||
progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)),
|
||||
progressbar.OptionShowBytes(false),
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
progressBar.Set(int(percentage * 10))
|
||||
}
|
||||
err = gallery.InstallModelFromGallery(galleries, modelName, ctx.String("models-path"), gallery.GalleryModel{}, progressCallback)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "tts",
|
||||
Usage: "Convert text to speech",
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "backend",
|
||||
Value: "piper",
|
||||
Aliases: []string{"b"},
|
||||
Usage: "Backend to run the TTS model",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "model",
|
||||
Aliases: []string{"m"},
|
||||
Usage: "Model name to run the TTS",
|
||||
Required: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "output-file",
|
||||
Aliases: []string{"o"},
|
||||
Usage: "The path to write the output wav file",
|
||||
},
|
||||
},
|
||||
Action: func(ctx *cli.Context) error {
|
||||
modelOption := ctx.String("model")
|
||||
if modelOption == "" {
|
||||
return errors.New("--model parameter is required")
|
||||
}
|
||||
backendOption := ctx.String("backend")
|
||||
if backendOption == "" {
|
||||
backendOption = "piper"
|
||||
}
|
||||
outputFile := ctx.String("output-file")
|
||||
outputDir := ctx.String("backend-assets-path")
|
||||
if outputFile != "" {
|
||||
outputDir = filepath.Dir(outputFile)
|
||||
}
|
||||
|
||||
text := strings.Join(ctx.Args().Slice(), " ")
|
||||
|
||||
opts := &options.Option{
|
||||
Loader: model.NewModelLoader(ctx.String("models-path")),
|
||||
Context: context.Background(),
|
||||
AudioDir: outputDir,
|
||||
AssetsDestination: ctx.String("backend-assets-path"),
|
||||
}
|
||||
|
||||
defer opts.Loader.StopAllGRPC()
|
||||
|
||||
filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if outputFile != "" {
|
||||
if err := os.Rename(filePath, outputFile); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("Generate file %s\n", outputFile)
|
||||
} else {
|
||||
fmt.Printf("Generate file %s\n", filePath)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "transcript",
|
||||
Usage: "Convert audio to text",
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "backend",
|
||||
Value: "whisper",
|
||||
Aliases: []string{"b"},
|
||||
Usage: "Backend to run the transcription model",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "model",
|
||||
Aliases: []string{"m"},
|
||||
Usage: "Model name to run the transcription",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "language",
|
||||
Aliases: []string{"l"},
|
||||
Usage: "Language of the audio file",
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "threads",
|
||||
Aliases: []string{"t"},
|
||||
Usage: "Threads to use",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "output-file",
|
||||
Aliases: []string{"o"},
|
||||
Usage: "The path to write the output wav file",
|
||||
},
|
||||
},
|
||||
Action: func(ctx *cli.Context) error {
|
||||
modelOption := ctx.String("model")
|
||||
filename := ctx.Args().First()
|
||||
language := ctx.String("language")
|
||||
threads := ctx.Int("threads")
|
||||
|
||||
opts := &options.Option{
|
||||
Loader: model.NewModelLoader(ctx.String("models-path")),
|
||||
Context: context.Background(),
|
||||
AssetsDestination: ctx.String("backend-assets-path"),
|
||||
}
|
||||
|
||||
cl := config.NewConfigLoader()
|
||||
if err := cl.LoadConfigs(ctx.String("models-path")); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c, exists := cl.GetConfig(modelOption)
|
||||
if !exists {
|
||||
return errors.New("model not found")
|
||||
}
|
||||
|
||||
c.Threads = threads
|
||||
|
||||
defer opts.Loader.StopAllGRPC()
|
||||
|
||||
tr, err := backend.ModelTranscription(filename, language, opts.Loader, c, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, segment := range tr.Segments {
|
||||
fmt.Println(segment.Start.String(), "-", segment.Text)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err = app.Run(os.Args)
|
||||
|
||||
83
metrics/metrics.go
Normal file
83
metrics/metrics.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/adaptor"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
api "go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
type Metrics struct {
|
||||
meter api.Meter
|
||||
apiTimeMetric api.Float64Histogram
|
||||
}
|
||||
|
||||
// setupOTelSDK bootstraps the OpenTelemetry pipeline.
|
||||
// If it does not return an error, make sure to call shutdown for proper cleanup.
|
||||
func SetupMetrics() (*Metrics, error) {
|
||||
exporter, err := prometheus.New()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
provider := metric.NewMeterProvider(metric.WithReader(exporter))
|
||||
meter := provider.Meter("github.com/go-skynet/LocalAI")
|
||||
|
||||
apiTimeMetric, err := meter.Float64Histogram("api_call", api.WithDescription("api calls"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Metrics{
|
||||
meter: meter,
|
||||
apiTimeMetric: apiTimeMetric,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func MetricsHandler() fiber.Handler {
|
||||
return adaptor.HTTPHandler(promhttp.Handler())
|
||||
}
|
||||
|
||||
type apiMiddlewareConfig struct {
|
||||
Filter func(c *fiber.Ctx) bool
|
||||
metrics *Metrics
|
||||
}
|
||||
|
||||
func APIMiddleware(metrics *Metrics) fiber.Handler {
|
||||
cfg := apiMiddlewareConfig{
|
||||
metrics: metrics,
|
||||
Filter: func(c *fiber.Ctx) bool {
|
||||
if c.Path() == "/metrics" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
},
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
if cfg.Filter != nil && cfg.Filter(c) {
|
||||
return c.Next()
|
||||
}
|
||||
path := c.Path()
|
||||
method := c.Method()
|
||||
|
||||
start := time.Now()
|
||||
err := c.Next()
|
||||
elapsed := float64(time.Since(start)) / float64(time.Second)
|
||||
cfg.metrics.ObserveAPICall(method, path, elapsed)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Metrics) ObserveAPICall(method string, path string, duration float64) {
|
||||
opts := api.WithAttributes(
|
||||
attribute.String("method", method),
|
||||
attribute.String("path", path),
|
||||
)
|
||||
m.apiTimeMetric.Record(context.Background(), duration, opts)
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
package bloomz
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
bloomz *bloomz.Bloomz
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
model, err := bloomz.New(opts.ModelFile)
|
||||
llm.bloomz = model
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption {
|
||||
predictOptions := []bloomz.PredictOption{
|
||||
bloomz.SetTemperature(float64(opts.Temperature)),
|
||||
bloomz.SetTopP(float64(opts.TopP)),
|
||||
bloomz.SetTopK(int(opts.TopK)),
|
||||
bloomz.SetTokens(int(opts.Tokens)),
|
||||
bloomz.SetThreads(int(opts.Threads)),
|
||||
}
|
||||
|
||||
if opts.Seed != 0 {
|
||||
predictOptions = append(predictOptions, bloomz.SetSeed(int(opts.Seed)))
|
||||
}
|
||||
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
// fallback to Predict
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
go func() {
|
||||
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
results <- res
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
package falcon
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
|
||||
ggllm "github.com/mudler/go-ggllm.cpp"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
falcon *ggllm.Falcon
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
ggllmOpts := []ggllm.ModelOption{}
|
||||
if opts.ContextSize != 0 {
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
|
||||
}
|
||||
// F16 doesn't seem to produce good output at all!
|
||||
//if c.F16 {
|
||||
// llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
//}
|
||||
|
||||
if opts.NGPULayers != 0 {
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetGPULayers(int(opts.NGPULayers)))
|
||||
}
|
||||
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetMMap(opts.MMap))
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetMainGPU(opts.MainGPU))
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetTensorSplit(opts.TensorSplit))
|
||||
if opts.NBatch != 0 {
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(int(opts.NBatch)))
|
||||
} else {
|
||||
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
|
||||
}
|
||||
|
||||
model, err := ggllm.New(opts.ModelFile, ggllmOpts...)
|
||||
llm.falcon = model
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
|
||||
predictOptions := []ggllm.PredictOption{
|
||||
ggllm.SetTemperature(float64(opts.Temperature)),
|
||||
ggllm.SetTopP(float64(opts.TopP)),
|
||||
ggllm.SetTopK(int(opts.TopK)),
|
||||
ggllm.SetTokens(int(opts.Tokens)),
|
||||
ggllm.SetThreads(int(opts.Threads)),
|
||||
}
|
||||
|
||||
if opts.PromptCacheAll {
|
||||
predictOptions = append(predictOptions, ggllm.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if opts.PromptCacheRO {
|
||||
predictOptions = append(predictOptions, ggllm.EnablePromptCacheRO)
|
||||
}
|
||||
|
||||
// Expected absolute path
|
||||
if opts.PromptCachePath != "" {
|
||||
predictOptions = append(predictOptions, ggllm.SetPathPromptCache(opts.PromptCachePath))
|
||||
}
|
||||
|
||||
if opts.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetMirostat(int(opts.Mirostat)))
|
||||
}
|
||||
|
||||
if opts.MirostatETA != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetMirostatETA(float64(opts.MirostatETA)))
|
||||
}
|
||||
|
||||
if opts.MirostatTAU != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetMirostatTAU(float64(opts.MirostatTAU)))
|
||||
}
|
||||
|
||||
if opts.Debug {
|
||||
predictOptions = append(predictOptions, ggllm.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetStopWords(opts.StopPrompts...))
|
||||
|
||||
if opts.PresencePenalty != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetPenalty(float64(opts.PresencePenalty)))
|
||||
}
|
||||
|
||||
if opts.NKeep != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetNKeep(int(opts.NKeep)))
|
||||
}
|
||||
|
||||
if opts.Batch != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetBatch(int(opts.Batch)))
|
||||
}
|
||||
|
||||
if opts.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, ggllm.IgnoreEOS)
|
||||
}
|
||||
|
||||
if opts.Seed != 0 {
|
||||
predictOptions = append(predictOptions, ggllm.SetSeed(int(opts.Seed)))
|
||||
}
|
||||
|
||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetFrequencyPenalty(float64(opts.FrequencyPenalty)))
|
||||
predictOptions = append(predictOptions, ggllm.SetMlock(opts.MLock))
|
||||
predictOptions = append(predictOptions, ggllm.SetMemoryMap(opts.MMap))
|
||||
predictOptions = append(predictOptions, ggllm.SetPredictionMainGPU(opts.MainGPU))
|
||||
predictOptions = append(predictOptions, ggllm.SetPredictionTensorSplit(opts.TensorSplit))
|
||||
predictOptions = append(predictOptions, ggllm.SetTailFreeSamplingZ(float64(opts.TailFreeSamplingZ)))
|
||||
predictOptions = append(predictOptions, ggllm.SetTypicalP(float64(opts.TypicalP)))
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, ggllm.SetTokenCallback(func(token string) bool {
|
||||
if token == "<|endoftext|>" {
|
||||
return true
|
||||
}
|
||||
results <- token
|
||||
return true
|
||||
}))
|
||||
|
||||
go func() {
|
||||
_, err := llm.falcon.Predict(opts.Prompt, predictOptions...)
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/imdario/mergo"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
@@ -21,9 +22,31 @@ func InstallModelFromGallery(galleries []Gallery, name string, basePath string,
|
||||
applyModel := func(model *GalleryModel) error {
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
|
||||
config, err := GetGalleryConfigFromURL(model.URL)
|
||||
if err != nil {
|
||||
return err
|
||||
var config Config
|
||||
|
||||
if len(model.URL) > 0 {
|
||||
var err error
|
||||
config, err = GetGalleryConfigFromURL(model.URL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if len(model.ConfigFile) > 0 {
|
||||
// TODO: is this worse than using the override method with a blank cfg yaml?
|
||||
reYamlConfig, err := yaml.Marshal(model.ConfigFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
config = Config{
|
||||
ConfigFile: string(reYamlConfig),
|
||||
Description: model.Description,
|
||||
License: model.License,
|
||||
URLs: model.URLs,
|
||||
Name: model.Name,
|
||||
Files: make([]File, 0), // Real values get added below, must be blank
|
||||
// Prompt Template Skipped for now - I expect in this mode that they will be delivered as files.
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid gallery model %+v", model)
|
||||
}
|
||||
|
||||
installName := model.Name
|
||||
@@ -115,13 +138,38 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod
|
||||
return models, nil
|
||||
}
|
||||
|
||||
func findGalleryURLFromReferenceURL(url string) (string, error) {
|
||||
var refFile string
|
||||
err := utils.GetURI(url, func(url string, d []byte) error {
|
||||
refFile = string(d)
|
||||
if len(refFile) == 0 {
|
||||
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
|
||||
}
|
||||
cutPoint := strings.LastIndex(url, "/")
|
||||
refFile = url[:cutPoint+1] + refFile
|
||||
return nil
|
||||
})
|
||||
return refFile, err
|
||||
}
|
||||
|
||||
func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) {
|
||||
var models []*GalleryModel = []*GalleryModel{}
|
||||
|
||||
if strings.HasSuffix(gallery.URL, ".ref") {
|
||||
var err error
|
||||
gallery.URL, err = findGalleryURLFromReferenceURL(gallery.URL)
|
||||
if err != nil {
|
||||
return models, err
|
||||
}
|
||||
}
|
||||
|
||||
err := utils.GetURI(gallery.URL, func(url string, d []byte) error {
|
||||
return yaml.Unmarshal(d, &models)
|
||||
})
|
||||
if err != nil {
|
||||
if yamlErr, ok := err.(*yaml.TypeError); ok {
|
||||
log.Debug().Msgf("YAML errors: %s\n\nwreckage of models: %+v", strings.Join(yamlErr.Errors, "\n"), models)
|
||||
}
|
||||
return models, err
|
||||
}
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ func GetGalleryConfigFromURL(url string) (Config, error) {
|
||||
return yaml.Unmarshal(d, &config)
|
||||
})
|
||||
if err != nil {
|
||||
log.Error().Msgf("GetGalleryConfigFromURL error for url %s\n%s", url, err.Error())
|
||||
return config, err
|
||||
}
|
||||
return config, nil
|
||||
|
||||
@@ -11,8 +11,9 @@ type GalleryModel struct {
|
||||
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
|
||||
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
|
||||
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
|
||||
|
||||
// Overrides are used to override the configuration of the model
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
// AdditionalFiles are used to add additional files to the model
|
||||
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
|
||||
@@ -21,7 +22,3 @@ type GalleryModel struct {
|
||||
// Installed is used to indicate if the model is installed or not
|
||||
Installed bool `json:"installed,omitempty" yaml:"installed,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
githubURI = "github:"
|
||||
)
|
||||
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
const (
|
||||
LlamaBackend = "llama"
|
||||
LlamaStableBackend = "llama-stable"
|
||||
BloomzBackend = "bloomz"
|
||||
LLamaCPP = "llama-cpp"
|
||||
StarcoderBackend = "starcoder"
|
||||
GPTJBackend = "gptj"
|
||||
DollyBackend = "dolly"
|
||||
@@ -29,7 +29,6 @@ const (
|
||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||
Gpt4AllJBackend = "gpt4all-j"
|
||||
Gpt4All = "gpt4all"
|
||||
FalconBackend = "falcon"
|
||||
FalconGGMLBackend = "falcon-ggml"
|
||||
|
||||
BertEmbeddingsBackend = "bert-embeddings"
|
||||
@@ -41,10 +40,10 @@ const (
|
||||
)
|
||||
|
||||
var AutoLoadBackends []string = []string{
|
||||
LlamaBackend,
|
||||
LLamaCPP,
|
||||
LlamaStableBackend,
|
||||
LlamaBackend,
|
||||
Gpt4All,
|
||||
FalconBackend,
|
||||
GPTNeoXBackend,
|
||||
BertEmbeddingsBackend,
|
||||
FalconGGMLBackend,
|
||||
@@ -54,7 +53,6 @@ var AutoLoadBackends []string = []string{
|
||||
MPTBackend,
|
||||
ReplitBackend,
|
||||
StarcoderBackend,
|
||||
BloomzBackend,
|
||||
RwkvBackend,
|
||||
WhisperBackend,
|
||||
StableDiffusionBackend,
|
||||
@@ -175,11 +173,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
|
||||
}
|
||||
|
||||
switch backend {
|
||||
case LlamaBackend, LlamaStableBackend, GPTJBackend, DollyBackend,
|
||||
MPTBackend, Gpt2Backend, FalconBackend,
|
||||
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
|
||||
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
|
||||
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
|
||||
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
|
||||
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
|
||||
return ml.LoadModel(o.model, ml.grpcModel(Gpt4All, o))
|
||||
@@ -187,7 +180,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
|
||||
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
|
||||
return ml.LoadModel(o.model, ml.grpcModel(PiperBackend, o))
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
|
||||
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
17
tests/e2e-fixtures/gpu.yaml
Normal file
17
tests/e2e-fixtures/gpu.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
context_size: 2048
|
||||
mirostat: 2
|
||||
mirostat_tau: 5.0
|
||||
mirostat_eta: 0.1
|
||||
f16: true
|
||||
threads: 1
|
||||
gpu_layers: 90
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: ggllm-test-model.bin
|
||||
rope_freq_base: 10000
|
||||
max_tokens: 20
|
||||
rope_freq_scale: 1
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
18
tests/e2e/e2e_suite_test.go
Normal file
18
tests/e2e/e2e_suite_test.go
Normal file
@@ -0,0 +1,18 @@
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var (
|
||||
localAIURL = os.Getenv("LOCALAI_API")
|
||||
)
|
||||
|
||||
func TestLocalAI(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "LocalAI E2E test suite")
|
||||
}
|
||||
70
tests/e2e/e2e_test.go
Normal file
70
tests/e2e/e2e_test.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
openaigo "github.com/otiai10/openaigo"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
var _ = Describe("E2E test", func() {
|
||||
var client *openai.Client
|
||||
var client2 *openaigo.Client
|
||||
|
||||
Context("API with ephemeral models", func() {
|
||||
BeforeEach(func() {
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = localAIURL
|
||||
|
||||
client2 = openaigo.NewClient("")
|
||||
client2.BaseURL = defaultConfig.BaseURL
|
||||
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
|
||||
// Check that the GPU was used
|
||||
AfterEach(func() {
|
||||
cmd := exec.Command("/bin/bash", "-xce", "docker logs $(docker ps -q --filter ancestor=localai-tests)")
|
||||
out, err := cmd.CombinedOutput()
|
||||
Expect(err).ToNot(HaveOccurred(), string(out))
|
||||
// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
|
||||
if os.Getenv("BUILD_TYPE") == "cublas" {
|
||||
|
||||
Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"), string(out))
|
||||
Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"), string(out))
|
||||
} else {
|
||||
fmt.Println("Skipping GPU check")
|
||||
Expect(string(out)).To(ContainSubstring("[llama-cpp] Loads OK"), string(out))
|
||||
Expect(string(out)).To(ContainSubstring("llama_model_loader"), string(out))
|
||||
}
|
||||
})
|
||||
|
||||
Context("Generates text", func() {
|
||||
It("streams chat tokens", func() {
|
||||
model := "gpt-4"
|
||||
resp, err := client.CreateChatCompletion(context.TODO(),
|
||||
openai.ChatCompletionRequest{
|
||||
Model: model, Messages: []openai.ChatCompletionMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "How much is 2+2?",
|
||||
},
|
||||
}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
|
||||
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user