mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
1 Commits
v3.2.0
...
llama_cpp/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f52776a1c |
40
.github/workflows/backend.yml
vendored
40
.github/workflows/backend.yml
vendored
@@ -597,7 +597,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "piper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
# bark-cpp
|
||||
- build-type: ''
|
||||
@@ -610,7 +610,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "bark-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
@@ -659,7 +659,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
@@ -671,7 +671,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
@@ -683,7 +683,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
@@ -695,7 +695,7 @@ jobs:
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
@@ -707,7 +707,7 @@ jobs:
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
@@ -719,7 +719,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
@@ -731,7 +731,7 @@ jobs:
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
# whisper
|
||||
- build-type: ''
|
||||
@@ -744,7 +744,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
@@ -756,7 +756,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
@@ -768,7 +768,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
@@ -780,7 +780,7 @@ jobs:
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
@@ -792,7 +792,7 @@ jobs:
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
@@ -804,7 +804,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
@@ -816,7 +816,7 @@ jobs:
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
@@ -828,7 +828,7 @@ jobs:
|
||||
runs-on: 'ubuntu-latest'
|
||||
skip-drivers: 'false'
|
||||
backend: "whisper"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
#silero-vad
|
||||
- build-type: ''
|
||||
@@ -841,7 +841,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "silero-vad"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
# local-store
|
||||
- build-type: ''
|
||||
@@ -854,7 +854,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "local-store"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
# huggingface
|
||||
- build-type: ''
|
||||
@@ -867,7 +867,7 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "huggingface"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
dockerfile: "./backend/Dockerfile.go"
|
||||
context: "./"
|
||||
llama-cpp-darwin:
|
||||
runs-on: macOS-14
|
||||
|
||||
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
variable: "BARKCPP_VERSION"
|
||||
branch: "main"
|
||||
file: "Makefile"
|
||||
- repository: "leejet/stable-diffusion.cpp"
|
||||
- repository: "richiejp/stable-diffusion.cpp"
|
||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/go/stablediffusion-ggml/Makefile"
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -12,7 +12,6 @@ prepare-sources
|
||||
/backends
|
||||
/backend-images
|
||||
/result.yaml
|
||||
protoc
|
||||
|
||||
*.log
|
||||
|
||||
|
||||
130
Makefile
130
Makefile
@@ -145,7 +145,7 @@ backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-sta
|
||||
|
||||
backends/whisper: docker-build-whisper docker-save-whisper build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
|
||||
|
||||
|
||||
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
|
||||
|
||||
@@ -242,7 +242,10 @@ help: ## Show this help.
|
||||
########################################################
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: protogen-go
|
||||
protogen: protogen-go protogen-python
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean: protogen-go-clean protogen-python-clean
|
||||
|
||||
protoc:
|
||||
@OS_NAME=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
|
||||
@@ -287,6 +290,93 @@ protogen-go-clean:
|
||||
$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: bark-protogen coqui-protogen chatterbox-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: bark-protogen-clean coqui-protogen-clean chatterbox-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: bark-protogen
|
||||
bark-protogen:
|
||||
$(MAKE) -C backend/python/bark protogen
|
||||
|
||||
.PHONY: bark-protogen-clean
|
||||
bark-protogen-clean:
|
||||
$(MAKE) -C backend/python/bark protogen-clean
|
||||
|
||||
.PHONY: coqui-protogen
|
||||
coqui-protogen:
|
||||
$(MAKE) -C backend/python/coqui protogen
|
||||
|
||||
.PHONY: coqui-protogen-clean
|
||||
coqui-protogen-clean:
|
||||
$(MAKE) -C backend/python/coqui protogen-clean
|
||||
|
||||
.PHONY: diffusers-protogen
|
||||
diffusers-protogen:
|
||||
$(MAKE) -C backend/python/diffusers protogen
|
||||
|
||||
.PHONY: chatterbox-protogen
|
||||
chatterbox-protogen:
|
||||
$(MAKE) -C backend/python/chatterbox protogen
|
||||
|
||||
.PHONY: diffusers-protogen-clean
|
||||
diffusers-protogen-clean:
|
||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||
|
||||
.PHONY: chatterbox-protogen-clean
|
||||
chatterbox-protogen-clean:
|
||||
$(MAKE) -C backend/python/chatterbox protogen-clean
|
||||
|
||||
.PHONY: faster-whisper-protogen
|
||||
faster-whisper-protogen:
|
||||
$(MAKE) -C backend/python/faster-whisper protogen
|
||||
|
||||
.PHONY: faster-whisper-protogen-clean
|
||||
faster-whisper-protogen-clean:
|
||||
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
||||
|
||||
.PHONY: exllama2-protogen
|
||||
exllama2-protogen:
|
||||
$(MAKE) -C backend/python/exllama2 protogen
|
||||
|
||||
.PHONY: exllama2-protogen-clean
|
||||
exllama2-protogen-clean:
|
||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||
|
||||
.PHONY: rerankers-protogen
|
||||
rerankers-protogen:
|
||||
$(MAKE) -C backend/python/rerankers protogen
|
||||
|
||||
.PHONY: rerankers-protogen-clean
|
||||
rerankers-protogen-clean:
|
||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||
|
||||
.PHONY: transformers-protogen
|
||||
transformers-protogen:
|
||||
$(MAKE) -C backend/python/transformers protogen
|
||||
|
||||
.PHONY: transformers-protogen-clean
|
||||
transformers-protogen-clean:
|
||||
$(MAKE) -C backend/python/transformers protogen-clean
|
||||
|
||||
.PHONY: kokoro-protogen
|
||||
kokoro-protogen:
|
||||
$(MAKE) -C backend/python/kokoro protogen
|
||||
|
||||
.PHONY: kokoro-protogen-clean
|
||||
kokoro-protogen-clean:
|
||||
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||
|
||||
.PHONY: vllm-protogen
|
||||
vllm-protogen:
|
||||
$(MAKE) -C backend/python/vllm protogen
|
||||
|
||||
.PHONY: vllm-protogen-clean
|
||||
vllm-protogen-clean:
|
||||
$(MAKE) -C backend/python/vllm protogen-clean
|
||||
|
||||
|
||||
prepare-test-extra: protogen-python
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
@@ -359,19 +449,19 @@ backend-images:
|
||||
mkdir -p backend-images
|
||||
|
||||
docker-build-llama-cpp:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg IMAGE_BASE=$(IMAGE_BASE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
|
||||
|
||||
docker-build-bark-cpp:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .
|
||||
docker build -t local-ai-backend:bark-cpp -f backend/Dockerfile.go --build-arg BACKEND=bark-cpp .
|
||||
|
||||
docker-build-piper:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper .
|
||||
docker build -t local-ai-backend:piper -f backend/Dockerfile.go --build-arg BACKEND=piper .
|
||||
|
||||
docker-build-local-store:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store .
|
||||
docker build -t local-ai-backend:local-store -f backend/Dockerfile.go --build-arg BACKEND=local-store .
|
||||
|
||||
docker-build-huggingface:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface .
|
||||
docker build -t local-ai-backend:huggingface -f backend/Dockerfile.go --build-arg BACKEND=huggingface .
|
||||
|
||||
docker-save-huggingface: backend-images
|
||||
docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
|
||||
@@ -380,7 +470,7 @@ docker-save-local-store: backend-images
|
||||
docker save local-ai-backend:local-store -o backend-images/local-store.tar
|
||||
|
||||
docker-build-silero-vad:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad .
|
||||
docker build -t local-ai-backend:silero-vad -f backend/Dockerfile.go --build-arg BACKEND=silero-vad .
|
||||
|
||||
docker-save-silero-vad: backend-images
|
||||
docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
|
||||
@@ -395,46 +485,46 @@ docker-save-bark-cpp: backend-images
|
||||
docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
|
||||
|
||||
docker-build-stablediffusion-ggml:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml .
|
||||
docker build -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.go --build-arg BACKEND=stablediffusion-ggml .
|
||||
|
||||
docker-save-stablediffusion-ggml: backend-images
|
||||
docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
|
||||
|
||||
docker-build-rerankers:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
|
||||
docker build -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
|
||||
|
||||
docker-build-vllm:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
|
||||
docker build -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
|
||||
|
||||
docker-build-transformers:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
|
||||
docker build -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
|
||||
|
||||
docker-build-diffusers:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
|
||||
docker build -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
|
||||
|
||||
docker-build-kokoro:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
|
||||
docker build -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
|
||||
|
||||
docker-build-whisper:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper .
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.go --build-arg BACKEND=whisper .
|
||||
|
||||
docker-save-whisper: backend-images
|
||||
docker save local-ai-backend:whisper -o backend-images/whisper.tar
|
||||
|
||||
docker-build-faster-whisper:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
|
||||
docker build -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
|
||||
|
||||
docker-build-coqui:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
|
||||
docker build -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
|
||||
|
||||
docker-build-bark:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
|
||||
docker build -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
|
||||
|
||||
docker-build-chatterbox:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
|
||||
docker build -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
|
||||
|
||||
docker-build-exllama2:
|
||||
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
|
||||
docker build -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
|
||||
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.65.0
|
||||
ARG CMAKE_FROM_SOURCE=false
|
||||
ARG CMAKE_VERSION=3.26.4
|
||||
ARG PROTOBUF_VERSION=v21.12
|
||||
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
@@ -49,6 +50,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
make install && \
|
||||
rm -rf /build
|
||||
|
||||
RUN git clone --recurse-submodules --branch ${PROTOBUF_VERSION} https://github.com/protocolbuffers/protobuf.git && \
|
||||
mkdir -p /build/protobuf/build && \
|
||||
cd /build/protobuf/build && \
|
||||
cmake -Dprotobuf_BUILD_SHARED_LIBS=ON -Dprotobuf_BUILD_TESTS=OFF .. && \
|
||||
make && \
|
||||
make install && \
|
||||
rm -rf /build
|
||||
|
||||
FROM ${BASE_IMAGE} AS builder
|
||||
ARG BACKEND=rerankers
|
||||
ARG BUILD_TYPE
|
||||
@@ -180,21 +189,9 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY . /LocalAI
|
||||
|
||||
## Otherwise just run the normal build
|
||||
RUN <<EOT bash
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
|
||||
make llama-cpp-grpc && make llama-cpp-rpc-server; \
|
||||
else \
|
||||
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
|
||||
make llama-cpp-avx2 && \
|
||||
make llama-cpp-avx512 && \
|
||||
make llama-cpp-fallback && \
|
||||
make llama-cpp-grpc && \
|
||||
make llama-cpp-rpc-server; \
|
||||
fi
|
||||
EOT
|
||||
|
||||
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp
|
||||
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-grpc
|
||||
RUN make -C /LocalAI/backend/cpp/llama-cpp llama-cpp-rpc-server
|
||||
|
||||
# Copy libraries using a script to handle architecture differences
|
||||
RUN make -C /LocalAI/backend/cpp/llama-cpp package
|
||||
|
||||
@@ -17,6 +17,8 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
||||
endif()
|
||||
|
||||
set(Protobuf_USE_STATIC_LIBS OFF)
|
||||
set(gRPC_USE_STATIC_LIBS OFF)
|
||||
find_package(absl CONFIG REQUIRED)
|
||||
find_package(Protobuf CONFIG REQUIRED)
|
||||
find_package(gRPC CONFIG REQUIRED)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=a86f52b2859dae4db5a7a0bbc0f1ad9de6b43ec6
|
||||
LLAMA_VERSION?=acd6cb1c41676f6bbb25c2a76fa5abeb1719301e
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
@@ -7,10 +7,9 @@ BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
JOBS?=$(shell nproc)
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF -DGGML_CPU_ALL_VARIANTS=ON -DGGML_BACKEND_DL=ON
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
@@ -90,33 +89,12 @@ else
|
||||
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
||||
endif
|
||||
|
||||
llama-cpp-avx2: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
|
||||
|
||||
llama-cpp-avx512: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
|
||||
|
||||
llama-cpp-avx: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
|
||||
|
||||
llama-cpp-fallback: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
llama-cpp: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS)" $(MAKE) VARIANT="llama-cpp-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-build/grpc-server llama-cpp
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
@@ -161,8 +139,8 @@ grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release -j $(JOBS) $(TARGET)"
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
else
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release -j $(JOBS) $(TARGET)
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
|
||||
@@ -6,34 +6,9 @@ CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
cd /
|
||||
|
||||
echo "CPU info:"
|
||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=llama-cpp-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||
BINARY=llama-cpp-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||
BINARY=llama-cpp-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||
BINARY=llama-cpp-avx512
|
||||
fi
|
||||
fi
|
||||
BINARY=llama-cpp
|
||||
|
||||
## P2P/GRPC mode
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
||||
BINARY=llama-cpp-grpc
|
||||
@@ -56,6 +31,3 @@ fi
|
||||
|
||||
echo "Using binary: $BINARY"
|
||||
exec $CURDIR/$BINARY "$@"
|
||||
|
||||
# In case we fail execing, just run fallback
|
||||
exec $CURDIR/llama-cpp-fallback "$@"
|
||||
@@ -18,8 +18,8 @@ GO_TAGS?=
|
||||
LD_FLAGS?=
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=1896b28ef2fd5b3643120e66979bea487385439f
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -91,18 +91,23 @@ endif
|
||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
||||
GGML_ARCHIVE_DIR := build/ggml/src/
|
||||
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
|
||||
ALL_OBJS := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.o')
|
||||
|
||||
# Name of the single merged library
|
||||
COMBINED_LIB := libggmlall.a
|
||||
|
||||
# Instead of using the archives generated by GGML, use the object files directly to avoid overwriting objects with the same base name
|
||||
# Rule to merge all the .a files into one
|
||||
$(COMBINED_LIB): $(ALL_ARCHIVES)
|
||||
@echo "Merging all .o into $(COMBINED_LIB): $(ALL_OBJS)"
|
||||
@echo "Merging all .a into $(COMBINED_LIB)"
|
||||
rm -f $@
|
||||
ar -qc $@ $(ALL_OBJS)
|
||||
mkdir -p merge-tmp
|
||||
for a in $(ALL_ARCHIVES); do \
|
||||
( cd merge-tmp && ar x ../$$a ); \
|
||||
done
|
||||
( cd merge-tmp && ar rcs ../$@ *.o )
|
||||
# Ensure we have a proper index
|
||||
ranlib $@
|
||||
# Clean up
|
||||
rm -rf merge-tmp
|
||||
|
||||
build/libstable-diffusion.a:
|
||||
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
|
||||
@@ -53,43 +53,9 @@ sd_ctx_t* sd_c;
|
||||
|
||||
sample_method_t sample_method;
|
||||
|
||||
// Copied from the upstream CLI
|
||||
void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
|
||||
//SDParams* params = (SDParams*)data;
|
||||
const char* level_str;
|
||||
|
||||
if (!log /*|| (!params->verbose && level <= SD_LOG_DEBUG)*/) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case SD_LOG_DEBUG:
|
||||
level_str = "DEBUG";
|
||||
break;
|
||||
case SD_LOG_INFO:
|
||||
level_str = "INFO";
|
||||
break;
|
||||
case SD_LOG_WARN:
|
||||
level_str = "WARN";
|
||||
break;
|
||||
case SD_LOG_ERROR:
|
||||
level_str = "ERROR";
|
||||
break;
|
||||
default: /* Potential future-proofing */
|
||||
level_str = "?????";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "[%-5s] ", level_str);
|
||||
fputs(log, stderr);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
int load_model(char *model, char* options[], int threads, int diff) {
|
||||
fprintf (stderr, "Loading model!\n");
|
||||
|
||||
sd_set_log_callback(sd_log_cb, NULL);
|
||||
|
||||
char *stableDiffusionModel = "";
|
||||
if (diff == 1 ) {
|
||||
stableDiffusionModel = model;
|
||||
@@ -104,8 +70,6 @@ int load_model(char *model, char* options[], int threads, int diff) {
|
||||
char *scheduler = "";
|
||||
char *sampler = "";
|
||||
|
||||
fprintf(stderr, "parsing options\n");
|
||||
|
||||
// If options is not NULL, parse options
|
||||
for (int i = 0; options[i] != NULL; i++) {
|
||||
char *optname = strtok(options[i], ":");
|
||||
@@ -134,13 +98,10 @@ int load_model(char *model, char* options[], int threads, int diff) {
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "parsed options\n");
|
||||
|
||||
int sample_method_found = -1;
|
||||
for (int m = 0; m < SAMPLE_METHOD_COUNT; m++) {
|
||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
||||
if (!strcmp(sampler, sample_method_str[m])) {
|
||||
sample_method_found = m;
|
||||
fprintf(stderr, "Found sampler: %s\n", sampler);
|
||||
}
|
||||
}
|
||||
if (sample_method_found == -1) {
|
||||
@@ -150,7 +111,7 @@ int load_model(char *model, char* options[], int threads, int diff) {
|
||||
sample_method = (sample_method_t)sample_method_found;
|
||||
|
||||
int schedule_found = -1;
|
||||
for (int d = 0; d < SCHEDULE_COUNT; d++) {
|
||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
||||
if (!strcmp(scheduler, schedule_str[d])) {
|
||||
schedule_found = d;
|
||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
||||
@@ -164,28 +125,30 @@ int load_model(char *model, char* options[], int threads, int diff) {
|
||||
}
|
||||
|
||||
schedule_t schedule = (schedule_t)schedule_found;
|
||||
|
||||
|
||||
fprintf (stderr, "Creating context\n");
|
||||
sd_ctx_params_t ctx_params;
|
||||
sd_ctx_params_init(&ctx_params);
|
||||
ctx_params.model_path = model;
|
||||
ctx_params.clip_l_path = clip_l_path;
|
||||
ctx_params.clip_g_path = clip_g_path;
|
||||
ctx_params.t5xxl_path = t5xxl_path;
|
||||
ctx_params.diffusion_model_path = stableDiffusionModel;
|
||||
ctx_params.vae_path = vae_path;
|
||||
ctx_params.taesd_path = "";
|
||||
ctx_params.control_net_path = "";
|
||||
ctx_params.lora_model_dir = "";
|
||||
ctx_params.embedding_dir = "";
|
||||
ctx_params.stacked_id_embed_dir = "";
|
||||
ctx_params.vae_decode_only = false;
|
||||
ctx_params.vae_tiling = false;
|
||||
ctx_params.free_params_immediately = false;
|
||||
ctx_params.n_threads = threads;
|
||||
ctx_params.rng_type = STD_DEFAULT_RNG;
|
||||
ctx_params.schedule = schedule;
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(&ctx_params);
|
||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
||||
clip_l_path,
|
||||
clip_g_path,
|
||||
t5xxl_path,
|
||||
stableDiffusionModel,
|
||||
vae_path,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
threads,
|
||||
SD_TYPE_COUNT,
|
||||
STD_DEFAULT_RNG,
|
||||
schedule,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
|
||||
if (sd_ctx == NULL) {
|
||||
fprintf (stderr, "failed loading model (generic error)\n");
|
||||
@@ -206,22 +169,29 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
|
||||
fprintf (stderr, "Generating image\n");
|
||||
|
||||
sd_img_gen_params_t p;
|
||||
sd_img_gen_params_init(&p);
|
||||
|
||||
p.prompt = text;
|
||||
p.negative_prompt = negativeText;
|
||||
p.guidance.txt_cfg = cfg_scale;
|
||||
p.guidance.slg.layers = skip_layers.data();
|
||||
p.guidance.slg.layer_count = skip_layers.size();
|
||||
p.width = width;
|
||||
p.height = height;
|
||||
p.sample_method = sample_method;
|
||||
p.sample_steps = steps;
|
||||
p.seed = seed;
|
||||
p.input_id_images_path = "";
|
||||
|
||||
results = generate_image(sd_c, &p);
|
||||
results = txt2img(sd_c,
|
||||
text,
|
||||
negativeText,
|
||||
-1, //clip_skip
|
||||
cfg_scale, // sfg_scale
|
||||
3.5f,
|
||||
0, // eta
|
||||
width,
|
||||
height,
|
||||
sample_method,
|
||||
steps,
|
||||
seed,
|
||||
1,
|
||||
NULL,
|
||||
0.9f,
|
||||
20.f,
|
||||
false,
|
||||
"",
|
||||
skip_layers.data(),
|
||||
skip_layers.size(),
|
||||
0,
|
||||
0.01,
|
||||
0.2);
|
||||
|
||||
if (results == NULL) {
|
||||
fprintf (stderr, "NO results\n");
|
||||
|
||||
@@ -37,8 +37,8 @@ func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
||||
|
||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
||||
length := C.size_t(len(opts.Options))
|
||||
options = (**C.char)(C.malloc((length + 1) * size))
|
||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options) + 1:len(opts.Options) + 1]
|
||||
options = (**C.char)(C.malloc(length * size))
|
||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
||||
|
||||
var diffusionModel int
|
||||
|
||||
@@ -66,7 +66,6 @@ func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
||||
for i, x := range oo {
|
||||
view[i] = C.CString(x)
|
||||
}
|
||||
view[len(oo)] = nil
|
||||
|
||||
sd.cfgScale = opts.CFGScale
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ package application
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
)
|
||||
|
||||
type Application struct {
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
|
||||
coreStartup "github.com/mudler/LocalAI/core/startup"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
|
||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -55,11 +55,11 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.BackendGalleries, options.ModelPath, options.BackendsPath, options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
if err := coreStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, options.ExternalBackends...); err != nil {
|
||||
if err := pkgStartup.InstallExternalBackends(options.BackendGalleries, options.BackendsPath, nil, options.ExternalBackends...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing external backends")
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/startup"
|
||||
"github.com/mudler/LocalAI/pkg/startup"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/startup"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/startup"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
@@ -72,7 +72,7 @@ func (u *CreateOCIImageCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
|
||||
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||
if len(u.Args) == 0 {
|
||||
if u.Args == nil || len(u.Args) == 0 {
|
||||
return fmt.Errorf("no GGUF file provided")
|
||||
}
|
||||
// We try to guess only if we don't have a template defined already
|
||||
|
||||
@@ -2,7 +2,7 @@ package gallery
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
)
|
||||
|
||||
// BackendMetadata represents the metadata stored in a JSON file for each installed backend
|
||||
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"runtime"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
"dario.cat/mergo"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
lconfig "github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
@@ -15,8 +15,8 @@ import (
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -175,7 +175,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
textContentToReturn = ""
|
||||
id = uuid.New().String()
|
||||
created = int(time.Now().Unix())
|
||||
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
|
||||
@@ -15,9 +15,9 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
@@ -12,8 +12,8 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -16,12 +16,12 @@ import (
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai/types"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
laudio "github.com/mudler/LocalAI/pkg/audio"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/sound"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
@@ -29,8 +29,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
localSampleRate = 16000
|
||||
remoteSampleRate = 24000
|
||||
localSampleRate = 16000
|
||||
remoteSampleRate = 24000
|
||||
)
|
||||
|
||||
// A model can be "emulated" that is: transcribe audio to text -> feed text to the LLM -> generate audio as result
|
||||
@@ -210,9 +210,9 @@ func registerRealtime(application *application.Application) func(c *websocket.Co
|
||||
// TODO: Need some way to pass this to the backend
|
||||
Threshold: 0.5,
|
||||
// TODO: This is ignored and the amount of padding is random at present
|
||||
PrefixPaddingMs: 30,
|
||||
PrefixPaddingMs: 30,
|
||||
SilenceDurationMs: 500,
|
||||
CreateResponse: func() *bool { t := true; return &t }(),
|
||||
CreateResponse: func() *bool { t := true; return &t }(),
|
||||
},
|
||||
},
|
||||
InputAudioTranscription: &types.InputAudioTranscription{
|
||||
@@ -233,7 +233,7 @@ func registerRealtime(application *application.Application) func(c *websocket.Co
|
||||
// TODO: The API has no way to configure the VAD model or other models that make up a pipeline to fake any-to-any
|
||||
// So possibly we could have a way to configure a composite model that can be used in situations where any-to-any is expected
|
||||
pipeline := config.Pipeline{
|
||||
VAD: "silero-vad",
|
||||
VAD: "silero-vad",
|
||||
Transcription: session.InputAudioTranscription.Model,
|
||||
}
|
||||
|
||||
@@ -567,8 +567,8 @@ func updateTransSession(session *Session, update *types.ClientSession, cl *confi
|
||||
trCur := session.InputAudioTranscription
|
||||
|
||||
if trUpd != nil && trUpd.Model != "" && trUpd.Model != trCur.Model {
|
||||
pipeline := config.Pipeline{
|
||||
VAD: "silero-vad",
|
||||
pipeline := config.Pipeline {
|
||||
VAD: "silero-vad",
|
||||
Transcription: trUpd.Model,
|
||||
}
|
||||
|
||||
@@ -684,7 +684,7 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
|
||||
sendEvent(c, types.InputAudioBufferClearedEvent{
|
||||
ServerEventBase: types.ServerEventBase{
|
||||
EventID: "event_TODO",
|
||||
Type: types.ServerEventTypeInputAudioBufferCleared,
|
||||
Type: types.ServerEventTypeInputAudioBufferCleared,
|
||||
},
|
||||
})
|
||||
|
||||
@@ -697,7 +697,7 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
|
||||
sendEvent(c, types.InputAudioBufferSpeechStartedEvent{
|
||||
ServerEventBase: types.ServerEventBase{
|
||||
EventID: "event_TODO",
|
||||
Type: types.ServerEventTypeInputAudioBufferSpeechStarted,
|
||||
Type: types.ServerEventTypeInputAudioBufferSpeechStarted,
|
||||
},
|
||||
AudioStartMs: time.Now().Sub(startTime).Milliseconds(),
|
||||
})
|
||||
@@ -719,7 +719,7 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
|
||||
sendEvent(c, types.InputAudioBufferSpeechStoppedEvent{
|
||||
ServerEventBase: types.ServerEventBase{
|
||||
EventID: "event_TODO",
|
||||
Type: types.ServerEventTypeInputAudioBufferSpeechStopped,
|
||||
Type: types.ServerEventTypeInputAudioBufferSpeechStopped,
|
||||
},
|
||||
AudioEndMs: time.Now().Sub(startTime).Milliseconds(),
|
||||
})
|
||||
@@ -728,9 +728,9 @@ func handleVAD(cfg *config.BackendConfig, evaluator *templates.Evaluator, sessio
|
||||
sendEvent(c, types.InputAudioBufferCommittedEvent{
|
||||
ServerEventBase: types.ServerEventBase{
|
||||
EventID: "event_TODO",
|
||||
Type: types.ServerEventTypeInputAudioBufferCommitted,
|
||||
Type: types.ServerEventTypeInputAudioBufferCommitted,
|
||||
},
|
||||
ItemID: generateItemID(),
|
||||
ItemID: generateItemID(),
|
||||
PreviousItemID: "TODO",
|
||||
})
|
||||
|
||||
@@ -833,9 +833,9 @@ func commitUtterance(ctx context.Context, utt []byte, cfg *config.BackendConfig,
|
||||
|
||||
func runVAD(ctx context.Context, session *Session, adata []int16) ([]*proto.VADSegment, error) {
|
||||
soundIntBuffer := &audio.IntBuffer{
|
||||
Format: &audio.Format{SampleRate: localSampleRate, NumChannels: 1},
|
||||
Format: &audio.Format{SampleRate: localSampleRate, NumChannels: 1},
|
||||
SourceBitDepth: 16,
|
||||
Data: sound.ConvertInt16ToInt(adata),
|
||||
Data: sound.ConvertInt16ToInt(adata),
|
||||
}
|
||||
|
||||
float32Data := soundIntBuffer.AsFloat32Buffer().Data
|
||||
|
||||
@@ -11,9 +11,9 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
|
||||
@@ -278,7 +278,6 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
|
||||
port, err := freeport.GetFreePort()
|
||||
if err != nil {
|
||||
zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ package services
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
@@ -7,8 +7,8 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
@@ -96,8 +96,8 @@ Your backend container should:
|
||||
For getting started, see the available backends in LocalAI here: https://github.com/mudler/LocalAI/tree/master/backend .
|
||||
|
||||
- For Python based backends there is a template that can be used as starting point: https://github.com/mudler/LocalAI/tree/master/backend/python/common/template .
|
||||
- For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark-cpp
|
||||
- For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama-cpp
|
||||
- For Golang based backends, you can see the `bark-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/go/bark
|
||||
- For C++ based backends, you can see the `llama-cpp` backend as an example: https://github.com/mudler/LocalAI/tree/master/backend/cpp/llama
|
||||
|
||||
### Publishing Your Backend
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ For instructions on using AIO images, see [Using container images]({{% relref "d
|
||||
|
||||
LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall.
|
||||
|
||||
[LocalAGI](https://github.com/mudler/LocalAGI) is a powerful, self-hostable AI Agent platform designed for maximum privacy and flexibility which encompassess and uses all the software stack. It provides a complete drop-in replacement for OpenAI's Responses APIs with advanced agentic capabilities, working entirely locally on consumer-grade hardware (CPU and GPU).
|
||||
[LocalAGI](https://github.com/mudler/LocalAGI) is a powerful, self-hostable AI Agent platform designed for maximum privacy and flexibility which encompassess and uses all the softwre stack. It provides a complete drop-in replacement for OpenAI's Responses APIs with advanced agentic capabilities, working entirely locally on consumer-grade hardware (CPU and GPU).
|
||||
|
||||
### Quick Start
|
||||
|
||||
|
||||
@@ -8,8 +8,8 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/system"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/system"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v2"
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
. "github.com/mudler/LocalAI/core/startup"
|
||||
. "github.com/mudler/LocalAI/pkg/startup"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
@@ -3,8 +3,8 @@ package templates_test
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
. "github.com/mudler/LocalAI/core/templates"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
. "github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
@@ -1,7 +1,7 @@
|
||||
package templates_test
|
||||
|
||||
import (
|
||||
. "github.com/mudler/LocalAI/core/templates" // Update with your module path
|
||||
. "github.com/mudler/LocalAI/pkg/templates" // Update with your module path
|
||||
|
||||
// Update with your module path
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
Reference in New Issue
Block a user