mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-23 16:20:01 -04:00
Compare commits
4 Commits
v2.10.1
...
docs_updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5b8d6a31e2 | ||
|
|
f0752be4aa | ||
|
|
bafc9effad | ||
|
|
d2934dd69f |
@@ -3,4 +3,4 @@ models
|
|||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
examples/**/models
|
examples/**/models
|
||||||
Dockerfile*
|
Dockerfile
|
||||||
2
.github/workflows/image-pr.yml
vendored
2
.github/workflows/image-pr.yml
vendored
@@ -22,7 +22,6 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
makeflags: "-j3"
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -81,7 +80,6 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
makeflags: "-j3"
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
|||||||
2
.github/workflows/image.yml
vendored
2
.github/workflows/image.yml
vendored
@@ -26,7 +26,6 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
makeflags: "-j3"
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -200,7 +199,6 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
makeflags: "-j3"
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
|||||||
6
.github/workflows/image_build.yml
vendored
6
.github/workflows/image_build.yml
vendored
@@ -46,11 +46,6 @@ on:
|
|||||||
required: true
|
required: true
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
makeflags:
|
|
||||||
description: 'Make Flags'
|
|
||||||
required: false
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername:
|
dockerUsername:
|
||||||
required: true
|
required: true
|
||||||
@@ -165,7 +160,6 @@ jobs:
|
|||||||
FFMPEG=${{ inputs.ffmpeg }}
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
IMAGE_TYPE=${{ inputs.image-type }}
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
|
|||||||
12
.github/workflows/test.yml
vendored
12
.github/workflows/test.yml
vendored
@@ -105,13 +105,9 @@ jobs:
|
|||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
GO_TAGS="stablediffusion tts" make test
|
GO_TAGS="stablediffusion tts" make test
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3
|
|
||||||
timeout-minutes: 5
|
|
||||||
|
|
||||||
tests-apple:
|
tests-apple:
|
||||||
runs-on: macOS-14
|
runs-on: macOS-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.21.x']
|
go-version: ['1.21.x']
|
||||||
@@ -134,8 +130,4 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3
|
|
||||||
timeout-minutes: 5
|
|
||||||
40
Dockerfile
40
Dockerfile
@@ -63,9 +63,7 @@ WORKDIR /build
|
|||||||
RUN test -n "$TARGETARCH" \
|
RUN test -n "$TARGETARCH" \
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
###################################
|
# Extras requirements
|
||||||
###################################
|
|
||||||
|
|
||||||
FROM requirements-core as requirements-extras
|
FROM requirements-core as requirements-extras
|
||||||
|
|
||||||
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
@@ -95,11 +93,8 @@ FROM requirements-${IMAGE_TYPE} as builder
|
|||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG BUILD_GRPC=true
|
ARG BUILD_GRPC=true
|
||||||
ARG MAKEFLAGS
|
|
||||||
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
@@ -108,7 +103,6 @@ WORKDIR /build
|
|||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
RUN echo "GO_TAGS: $GO_TAGS"
|
|
||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
@@ -122,10 +116,10 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
|||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||||
git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && make install \
|
../.. && make -j12 install \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
@@ -145,12 +139,10 @@ ARG FFMPEG
|
|||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG MAKEFLAGS
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
|
||||||
|
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
@@ -194,43 +186,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/
|
|||||||
|
|
||||||
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/autogptq \
|
make -C backend/python/autogptq \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/bark \
|
make -C backend/python/bark \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/diffusers \
|
make -C backend/python/diffusers \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/vllm \
|
make -C backend/python/vllm \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/mamba \
|
make -C backend/python/mamba \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/sentencetransformers \
|
make -C backend/python/sentencetransformers \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/transformers \
|
make -C backend/python/transformers \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/vall-e-x \
|
make -C backend/python/vall-e-x \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/exllama \
|
make -C backend/python/exllama \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/exllama2 \
|
make -C backend/python/exllama2 \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/petals \
|
make -C backend/python/petals \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/transformers-musicgen \
|
make -C backend/python/transformers-musicgen \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/coqui \
|
make -C backend/python/coqui \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
|
|||||||
253
Makefile
253
Makefile
@@ -4,8 +4,11 @@ GOVET=$(GOCMD) vet
|
|||||||
BINARY_NAME=local-ai
|
BINARY_NAME=local-ai
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||||
CPPLLAMA_VERSION?=d01b3c4c32357567f3531d4e6ceffc5d23e87583
|
|
||||||
|
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||||
|
|
||||||
|
CPPLLAMA_VERSION?=19885d205e768579ab090d1e99281cae58c21b54
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
@@ -16,13 +19,13 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
|||||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_CPP_VERSION?=a56f435fd475afd7edf02bfbf9f8c77f527198c2
|
WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
|
STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
|
||||||
@@ -35,7 +38,6 @@ export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
|||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=
|
||||||
|
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
CGO_LDFLAGS_WHISPER?=
|
|
||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=git
|
BUILD_ID?=git
|
||||||
@@ -70,7 +72,7 @@ UNAME_S := $(shell uname -s)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OS),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
|
CGO_LDFLAGS += -lcblas -framework Accelerate
|
||||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||||
endif
|
endif
|
||||||
@@ -81,12 +83,6 @@ ifeq ($(OS),Darwin)
|
|||||||
# disable metal if on Darwin and any other value is explicitly passed.
|
# disable metal if on Darwin and any other value is explicitly passed.
|
||||||
else ifneq ($(BUILD_TYPE),metal)
|
else ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||||
export LLAMA_NO_ACCELERATE=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
|
||||||
# -lcblas removed: it seems to always be listed as a duplicate flag.
|
|
||||||
CGO_LDFLAGS += -framework Accelerate
|
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -95,12 +91,10 @@ ifeq ($(BUILD_TYPE),openblas)
|
|||||||
export WHISPER_OPENBLAS=1
|
export WHISPER_OPENBLAS=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||||
export LLAMA_CUBLAS=1
|
export LLAMA_CUBLAS=1
|
||||||
export WHISPER_CUBLAS=1
|
export WHISPER_CUBLAS=1
|
||||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),hipblas)
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
@@ -154,6 +148,7 @@ endif
|
|||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||||
@@ -173,41 +168,40 @@ ifeq ($(BUILD_API_ONLY),true)
|
|||||||
GRPC_BACKENDS=
|
GRPC_BACKENDS=
|
||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: all test build vendor get-sources prepare-sources prepare
|
.PHONY: all test build vendor
|
||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
## BERT embeddings
|
|
||||||
sources/go-bert:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
|
||||||
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
sources/go-bert/libgobert.a: sources/go-bert
|
|
||||||
$(MAKE) -C sources/go-bert libgobert.a
|
|
||||||
|
|
||||||
## go-llama-ggml
|
|
||||||
sources/go-llama-ggml:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
|
||||||
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
|
||||||
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
|
||||||
|
|
||||||
## go-piper
|
|
||||||
sources/go-piper:
|
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
|
|
||||||
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
|
||||||
|
|
||||||
## GPT4ALL
|
## GPT4ALL
|
||||||
sources/gpt4all:
|
sources/gpt4all:
|
||||||
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
|
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
|
||||||
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
## go-piper
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
sources/go-piper:
|
||||||
|
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
|
||||||
|
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
## BERT embeddings
|
||||||
|
sources/go-bert:
|
||||||
|
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||||
|
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
## stable diffusion
|
||||||
|
sources/go-stable-diffusion:
|
||||||
|
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
|
||||||
|
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
sources/go-stable-diffusion/libstablediffusion.a:
|
||||||
|
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||||
|
|
||||||
|
## tiny-dream
|
||||||
|
sources/go-tiny-dream:
|
||||||
|
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
|
||||||
|
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
sources/go-tiny-dream/libtinydream.a:
|
||||||
|
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
sources/go-rwkv:
|
sources/go-rwkv:
|
||||||
@@ -217,23 +211,23 @@ sources/go-rwkv:
|
|||||||
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||||
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
## stable diffusion
|
sources/go-bert/libgobert.a: sources/go-bert
|
||||||
sources/go-stable-diffusion:
|
$(MAKE) -C sources/go-bert libgobert.a
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
|
|
||||||
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||||
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
mkdir -p backend-assets/gpt4all
|
||||||
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||||
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||||
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||||
|
|
||||||
## tiny-dream
|
backend-assets/espeak-ng-data: sources/go-piper
|
||||||
sources/go-tiny-dream:
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
|
$(MAKE) -C sources/go-piper piper.o
|
||||||
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
|
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
|
|
||||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
|
|
||||||
## whisper
|
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
@@ -241,34 +235,47 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && make libwhisper.a
|
cd sources/whisper.cpp && make libwhisper.a
|
||||||
|
|
||||||
get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
sources/go-llama:
|
||||||
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
|
||||||
|
cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
sources/go-llama-ggml:
|
||||||
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||||
|
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
sources/go-llama/libbinding.a: sources/go-llama
|
||||||
|
$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
|
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||||
|
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main
|
||||||
|
|
||||||
|
backend/cpp/llama/llama.cpp:
|
||||||
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||||
|
|
||||||
|
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
||||||
|
touch $@
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
|
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
||||||
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
|
||||||
|
|
||||||
dropreplace:
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
|
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
|
touch $@
|
||||||
|
|
||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
|
$(MAKE) -C sources/go-llama clean
|
||||||
$(MAKE) -C sources/go-llama-ggml clean
|
$(MAKE) -C sources/go-llama-ggml clean
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||||
$(MAKE) -C sources/go-rwkv clean
|
$(MAKE) -C sources/go-rwkv clean
|
||||||
@@ -280,6 +287,7 @@ rebuild: ## Rebuilds the project
|
|||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
|
touch $@
|
||||||
|
|
||||||
clean: ## Remove build related file
|
clean: ## Remove build related file
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
@@ -290,15 +298,10 @@ clean: ## Remove build related file
|
|||||||
rm -rf backend-assets
|
rm -rf backend-assets
|
||||||
$(MAKE) -C backend/cpp/grpc clean
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
$(MAKE) dropreplace
|
|
||||||
|
|
||||||
clean-tests:
|
|
||||||
rm -rf test-models
|
|
||||||
rm -rf test-dir
|
|
||||||
rm -rf core/http/backend-assets
|
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: prepare backend-assets grpcs ## Build the project
|
|
||||||
|
build: backend-assets grpcs prepare ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||||
@@ -316,10 +319,10 @@ osx-signed: build
|
|||||||
run: prepare ## run local-ai
|
run: prepare ## run local-ai
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
||||||
|
|
||||||
test-models/testmodel.ggml:
|
test-models/testmodel:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
|
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
|
||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
@@ -331,7 +334,7 @@ prepare-test: grpcs
|
|||||||
cp -rf backend-assets core/http
|
cp -rf backend-assets core/http
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
test: prepare test-models/testmodel.ggml grpcs
|
test: prepare test-models/testmodel grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion"
|
export GO_TAGS="tts stablediffusion"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
@@ -451,85 +454,87 @@ ifeq ($(BUILD_API_ONLY),true)
|
|||||||
touch backend-assets/keep
|
touch backend-assets/keep
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
|
backend-assets/grpc:
|
||||||
mkdir -p backend-assets/espeak-ng-data
|
|
||||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
|
||||||
|
|
||||||
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
|
||||||
mkdir -p backend-assets/gpt4all
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
|
||||||
|
|
||||||
backend-assets/grpc: replace
|
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
|
backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
|
||||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
# TODO: every binary should have its own folder instead, so can have different implementations
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/
|
||||||
|
endif
|
||||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
|
||||||
|
|
||||||
backend/cpp/llama/llama.cpp:
|
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
|
||||||
|
|
||||||
|
## BACKEND CPP LLAMA START
|
||||||
|
# Sets the variables in case it has to build the gRPC locally.
|
||||||
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||||
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
|
|
||||||
backend/cpp/llama/grpc-server:
|
backend/cpp/llama/grpc-server:
|
||||||
# Conditionally build grpc for the llama backend to use if needed
|
|
||||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
$(MAKE) -C backend/cpp/grpc build
|
$(MAKE) -C backend/cpp/grpc build
|
||||||
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
|
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
||||||
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
|
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
||||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \
|
||||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
|
||||||
$(MAKE) -C backend/cpp/llama grpc-server
|
|
||||||
else
|
else
|
||||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
endif
|
endif
|
||||||
|
## BACKEND CPP LLAMA END
|
||||||
|
|
||||||
|
##
|
||||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
|
backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||||
|
|
||||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||||
|
|
||||||
backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
|
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||||
|
|
||||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
||||||
|
|
||||||
|
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||||
|
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||||
|
$(MAKE) sources/go-stable-diffusion; \
|
||||||
|
$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
|
|
||||||
|
backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||||
|
|
||||||
grpcs: prepare $(GRPC_BACKENDS)
|
grpcs: prepare $(GRPC_BACKENDS)
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ $(INSTALLED_PACKAGES): grpc_build
|
|||||||
|
|
||||||
$(GRPC_REPO):
|
$(GRPC_REPO):
|
||||||
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
|
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
|
||||||
cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
|
cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)
|
||||||
|
|
||||||
$(GRPC_BUILD): $(GRPC_REPO)
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
mkdir -p $(GRPC_BUILD)
|
mkdir -p $(GRPC_BUILD)
|
||||||
|
|||||||
@@ -18,12 +18,6 @@ else ifeq ($(BUILD_TYPE),clblas)
|
|||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
|
||||||
endif
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
@@ -41,7 +35,7 @@ llama.cpp:
|
|||||||
fi
|
fi
|
||||||
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/examples/grpc-server:
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
|||||||
@@ -1084,7 +1084,7 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.tok == llama_token_eos(model))
|
if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ dependencies:
|
|||||||
- async-timeout==4.0.3
|
- async-timeout==4.0.3
|
||||||
- attrs==23.1.0
|
- attrs==23.1.0
|
||||||
- bark==0.1.5
|
- bark==0.1.5
|
||||||
- bitsandbytes==0.43.0
|
|
||||||
- boto3==1.28.61
|
- boto3==1.28.61
|
||||||
- botocore==1.31.61
|
- botocore==1.31.61
|
||||||
- certifi==2023.7.22
|
- certifi==2023.7.22
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ if XPU:
|
|||||||
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
|
from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
|
||||||
from transformers import AutoTokenizer, AutoModel, set_seed
|
from transformers import AutoTokenizer, AutoModel, set_seed
|
||||||
else:
|
else:
|
||||||
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig
|
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed
|
||||||
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
@@ -75,50 +75,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
A Result object that contains the result of the LoadModel operation.
|
A Result object that contains the result of the LoadModel operation.
|
||||||
"""
|
"""
|
||||||
model_name = request.Model
|
model_name = request.Model
|
||||||
|
|
||||||
compute = "auto"
|
|
||||||
if request.F16Memory == True:
|
|
||||||
compute=torch.bfloat16
|
|
||||||
|
|
||||||
self.CUDA = request.CUDA
|
|
||||||
|
|
||||||
device_map="cpu"
|
|
||||||
|
|
||||||
quantization = None
|
|
||||||
|
|
||||||
if self.CUDA:
|
|
||||||
if request.Device:
|
|
||||||
device_map=request.Device
|
|
||||||
else:
|
|
||||||
device_map="cuda:0"
|
|
||||||
if request.Quantization == "bnb_4bit":
|
|
||||||
quantization = BitsAndBytesConfig(
|
|
||||||
load_in_4bit = True,
|
|
||||||
bnb_4bit_compute_dtype = compute,
|
|
||||||
bnb_4bit_quant_type = "nf4",
|
|
||||||
bnb_4bit_use_double_quant = True,
|
|
||||||
load_in_8bit = False,
|
|
||||||
)
|
|
||||||
elif request.Quantization == "bnb_8bit":
|
|
||||||
quantization = BitsAndBytesConfig(
|
|
||||||
load_in_4bit=False,
|
|
||||||
bnb_4bit_compute_dtype = None,
|
|
||||||
load_in_8bit=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if request.Type == "AutoModelForCausalLM":
|
if request.Type == "AutoModelForCausalLM":
|
||||||
if XPU:
|
if XPU:
|
||||||
if quantization == "xpu_4bit":
|
|
||||||
xpu_4bit = True
|
|
||||||
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,
|
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,
|
||||||
device_map="xpu", load_in_4bit=xpu_4bit)
|
device_map="xpu", load_in_4bit=True)
|
||||||
else:
|
else:
|
||||||
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute)
|
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||||
else:
|
else:
|
||||||
self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute)
|
self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
self.CUDA = False
|
||||||
self.XPU = False
|
self.XPU = False
|
||||||
|
|
||||||
if XPU:
|
if XPU:
|
||||||
@@ -129,6 +97,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
except Exception as err:
|
except Exception as err:
|
||||||
print("Not using XPU:", err, file=sys.stderr)
|
print("Not using XPU:", err, file=sys.stderr)
|
||||||
|
|
||||||
|
if request.CUDA or torch.cuda.is_available():
|
||||||
|
try:
|
||||||
|
print("Loading model", model_name, "to CUDA.", file=sys.stderr)
|
||||||
|
self.model = self.model.to("cuda")
|
||||||
|
self.CUDA = True
|
||||||
|
except Exception as err:
|
||||||
|
print("Not using CUDA:", err, file=sys.stderr)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
# Implement your logic here for the LoadModel service
|
# Implement your logic here for the LoadModel service
|
||||||
@@ -155,17 +130,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
||||||
|
|
||||||
# Create word embeddings
|
# Create word embeddings
|
||||||
if self.CUDA:
|
model_output = self.model(**encoded_input)
|
||||||
encoded_input = encoded_input.to("cuda")
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
model_output = self.model(**encoded_input)
|
|
||||||
|
|
||||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).detach().numpy()
|
||||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||||
print("Embeddings:", sentence_embeddings, file=sys.stderr)
|
print("Embeddings:", sentence_embeddings, file=sys.stderr)
|
||||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
|
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
||||||
|
|
||||||
def Predict(self, request, context):
|
def Predict(self, request, context):
|
||||||
"""
|
"""
|
||||||
@@ -192,8 +163,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if XPU:
|
if XPU:
|
||||||
inputs = inputs.to("xpu")
|
inputs = inputs.to("xpu")
|
||||||
|
|
||||||
outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP, do_sample=True, pad_token_id=self.tokenizer.eos_token_id)
|
outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP)
|
||||||
generated_text = self.tokenizer.batch_decode(outputs[:, inputs.shape[1]:], skip_special_tokens=True)[0]
|
|
||||||
|
generated_text = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
||||||
|
# Remove prompt from response if present
|
||||||
|
if request.Prompt in generated_text:
|
||||||
|
generated_text = generated_text.replace(request.Prompt, "")
|
||||||
|
|
||||||
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||||
|
if !backendConfig.Embeddings {
|
||||||
|
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
|
||||||
|
}
|
||||||
|
|
||||||
modelFile := backendConfig.Model
|
modelFile := backendConfig.Model
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
grpcOpts := gRPCModelOpts(backendConfig)
|
||||||
|
|||||||
@@ -276,12 +276,8 @@ func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) {
|
|||||||
cfg.F16 = &f16
|
cfg.F16 = &f16
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.Debug == nil {
|
|
||||||
cfg.Debug = &falseV
|
|
||||||
}
|
|
||||||
|
|
||||||
if debug {
|
if debug {
|
||||||
cfg.Debug = &trueV
|
cfg.Debug = &debug
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -666,15 +666,15 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
||||||
})
|
})
|
||||||
It("can generate completions via ggml", func() {
|
It("can generate completions", func() {
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate chat completions via ggml", func() {
|
It("can generate chat completions ", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
|
|||||||
@@ -185,14 +185,6 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
config.RepeatPenalty = input.RepeatPenalty
|
config.RepeatPenalty = input.RepeatPenalty
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.FrequencyPenalty!= 0 {
|
|
||||||
config.FrequencyPenalty = input.FrequencyPenalty
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.PresencePenalty!= 0 {
|
|
||||||
config.PresencePenalty = input.PresencePenalty
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Keep != 0 {
|
if input.Keep != 0 {
|
||||||
config.Keep = input.Keep
|
config.Keep = input.Keep
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
|
|||||||
type OpenAIRequest struct {
|
type OpenAIRequest struct {
|
||||||
PredictionOptions
|
PredictionOptions
|
||||||
|
|
||||||
Context context.Context `json:"-"`
|
Context context.Context `json:"-"`
|
||||||
Cancel context.CancelFunc `json:"-"`
|
Cancel context.CancelFunc `json:"-"`
|
||||||
|
|
||||||
// whisper
|
// whisper
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ type PredictionOptions struct {
|
|||||||
Keep int `json:"n_keep" yaml:"n_keep"`
|
Keep int `json:"n_keep" yaml:"n_keep"`
|
||||||
|
|
||||||
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
||||||
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
|
|
||||||
TFZ float64 `json:"tfz" yaml:"tfz"`
|
TFZ float64 `json:"tfz" yaml:"tfz"`
|
||||||
|
|
||||||
TypicalP float64 `json:"typical_p" yaml:"typical_p"`
|
TypicalP float64 `json:"typical_p" yaml:"typical_p"`
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "v2.10.0"
|
"version": "v2.9.0"
|
||||||
}
|
}
|
||||||
|
|||||||
13
main.go
13
main.go
@@ -306,16 +306,11 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
|
|||||||
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
configdir := ctx.String("localai-config-dir")
|
closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
|
||||||
// Watch the configuration directory
|
defer closeConfigWatcherFn()
|
||||||
// If the directory does not exist, we don't watch it
|
|
||||||
if _, err := os.Stat(configdir); err == nil {
|
|
||||||
closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
|
|
||||||
defer closeConfigWatcherFn()
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
|
return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
appHTTP, err := http.App(cl, ml, options)
|
appHTTP, err := http.App(cl, ml, options)
|
||||||
|
|||||||
@@ -15,11 +15,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var Aliases map[string]string = map[string]string{
|
var Aliases map[string]string = map[string]string{
|
||||||
"go-llama": LLamaCPP,
|
"go-llama": GoLlamaBackend,
|
||||||
"llama": LLamaCPP,
|
"llama": LLamaCPP,
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
GoLlamaBackend = "llama"
|
||||||
LlamaGGML = "llama-ggml"
|
LlamaGGML = "llama-ggml"
|
||||||
LLamaCPP = "llama-cpp"
|
LLamaCPP = "llama-cpp"
|
||||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||||
@@ -34,11 +35,15 @@ const (
|
|||||||
TinyDreamBackend = "tinydream"
|
TinyDreamBackend = "tinydream"
|
||||||
PiperBackend = "piper"
|
PiperBackend = "piper"
|
||||||
LCHuggingFaceBackend = "langchain-huggingface"
|
LCHuggingFaceBackend = "langchain-huggingface"
|
||||||
|
|
||||||
|
// External Backends that need special handling within LocalAI:
|
||||||
|
TransformersMusicGen = "transformers-musicgen"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AutoLoadBackends []string = []string{
|
var AutoLoadBackends []string = []string{
|
||||||
LLamaCPP,
|
LLamaCPP,
|
||||||
LlamaGGML,
|
LlamaGGML,
|
||||||
|
GoLlamaBackend,
|
||||||
Gpt4All,
|
Gpt4All,
|
||||||
BertEmbeddingsBackend,
|
BertEmbeddingsBackend,
|
||||||
RwkvBackend,
|
RwkvBackend,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
- name: list1
|
- name: list1
|
||||||
parameters:
|
parameters:
|
||||||
model: testmodel.ggml
|
model: testmodel
|
||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
@@ -19,7 +19,7 @@
|
|||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
model: testmodel.ggml
|
model: testmodel
|
||||||
context_size: 200
|
context_size: 200
|
||||||
stopwords:
|
stopwords:
|
||||||
- "HUMAN:"
|
- "HUMAN:"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
name: gpt4all
|
name: gpt4all
|
||||||
parameters:
|
parameters:
|
||||||
model: testmodel.ggml
|
model: testmodel
|
||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
name: gpt4all-2
|
name: gpt4all-2
|
||||||
parameters:
|
parameters:
|
||||||
model: testmodel.ggml
|
model: testmodel
|
||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
|
|||||||
Reference in New Issue
Block a user