Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
894a30296a feat: unify and propagate CMAKE_ARGS to GGML-based backends
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 22:02:58 +01:00
111 changed files with 516 additions and 4359 deletions

9
.env
View File

@@ -82,15 +82,6 @@
# Enable to allow p2p mode # Enable to allow p2p mode
# LOCALAI_P2P=true # LOCALAI_P2P=true
# Enable to use federated mode
# LOCALAI_FEDERATED=true
# Enable to start federation server
# FEDERATED_SERVER=true
# Define to use federation token
# TOKEN=""
### Watchdog settings ### Watchdog settings
### ###
# Enables watchdog to kill backends that are inactive for too much time # Enables watchdog to kill backends that are inactive for too much time

4
.github/labeler.yml vendored
View File

@@ -5,10 +5,6 @@ dependencies:
- any: - any:
- changed-files: - changed-files:
- any-glob-to-any-file: 'Makefile' - any-glob-to-any-file: 'Makefile'
- changed-files:
- any-glob-to-any-file: '*.mod'
- changed-files:
- any-glob-to-any-file: '*.sum'
kind/documentation: kind/documentation:
- any: - any:

View File

@@ -280,7 +280,6 @@ jobs:
makeflags: ${{ matrix.makeflags }} makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }} latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }} latest-image-aio: ${{ matrix.latest-image-aio }}
skip-drivers: ${{ matrix.skip-drivers }}
secrets: secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -302,7 +301,6 @@ jobs:
latest-image: 'latest-cpu' latest-image: 'latest-cpu'
latest-image-aio: 'latest-aio-cpu' latest-image-aio: 'latest-aio-cpu'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
cuda-minor-version: "7" cuda-minor-version: "7"
@@ -314,7 +312,6 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "0"
@@ -326,7 +323,6 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
cuda-minor-version: "7" cuda-minor-version: "7"
@@ -338,7 +334,6 @@ jobs:
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "0"
@@ -349,7 +344,6 @@ jobs:
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan' - build-type: 'vulkan'
platforms: 'linux/amd64' platforms: 'linux/amd64'
@@ -360,45 +354,4 @@ jobs:
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
# parallel-builds:
# uses: ./.github/workflows/image_build.yml
# with:
# tag-latest: ${{ matrix.tag-latest }}
# tag-suffix: ${{ matrix.tag-suffix }}
# ffmpeg: ${{ matrix.ffmpeg }}
# image-type: ${{ matrix.image-type }}
# build-type: ${{ matrix.build-type }}
# cuda-major-version: ${{ matrix.cuda-major-version }}
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
# platforms: ${{ matrix.platforms }}
# runs-on: ${{ matrix.runs-on }}
# aio: ${{ matrix.aio }}
# base-image: ${{ matrix.base-image }}
# grpc-base-image: ${{ matrix.grpc-base-image }}
# makeflags: ${{ matrix.makeflags }}
# latest-image: ${{ matrix.latest-image }}
# latest-image-aio: ${{ matrix.latest-image-aio }}
# skip-drivers: ${{ matrix.skip-drivers }}
# secrets:
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
# strategy:
# matrix:
# include:
# - build-type: 'cublas'
# cuda-major-version: "12"
# cuda-minor-version: "0"
# platforms: 'linux/arm64'
# tag-latest: 'false'
# tag-suffix: '-nvidia-l4t-arm64-core'
# latest-image: 'latest-nvidia-l4t-arm64-core'
# ffmpeg: 'true'
# image-type: 'core'
# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
# runs-on: 'self-hosted'
# makeflags: "--jobs=4 --output-sync=target"
# skip-drivers: 'true'

View File

@@ -49,10 +49,6 @@ on:
description: 'FFMPEG' description: 'FFMPEG'
default: '' default: ''
type: string type: string
skip-drivers:
description: 'Skip drivers by default'
default: 'false'
type: string
image-type: image-type:
description: 'Image type' description: 'Image type'
default: '' default: ''
@@ -238,7 +234,6 @@ jobs:
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
SKIP_DRIVERS=${{ inputs.skip-drivers }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
cache-from: type=gha cache-from: type=gha
@@ -267,7 +262,6 @@ jobs:
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
SKIP_DRIVERS=${{ inputs.skip-drivers }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
cache-from: type=gha cache-from: type=gha

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner - name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.22.0 uses: securego/gosec@v2.21.4
with: with:
# we let the report trigger content trigger a failure using the GitHub Security features. # we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...' args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -115,13 +115,12 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
ARG BUILD_TYPE ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=0 ARG CUDA_MINOR_VERSION=0
ARG SKIP_DRIVERS=false
ENV BUILD_TYPE=${BUILD_TYPE} ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements # Vulkan requirements
RUN <<EOT bash RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then if [ "${BUILD_TYPE}" = "vulkan" ]; then
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \ software-properties-common pciutils wget gpg-agent && \
@@ -137,7 +136,7 @@ EOT
# CuBLAS requirements # CuBLAS requirements
RUN <<EOT bash RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then if [ "${BUILD_TYPE}" = "cublas" ]; then
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
software-properties-common pciutils software-properties-common pciutils
@@ -163,7 +162,7 @@ RUN <<EOT bash
EOT EOT
# If we are building with clblas support, we need the libraries for the builds # If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
libclblast-dev && \ libclblast-dev && \
@@ -171,7 +170,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
rm -rf /var/lib/apt/lists/* \ rm -rf /var/lib/apt/lists/* \
; fi ; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
hipblas-dev \ hipblas-dev \

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -32,7 +32,7 @@ BARKCPP_VERSION?=v1.0.0
# stablediffusion.cpp (ggml) # stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48
ONNX_VERSION?=1.20.0 ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64 ONNX_ARCH?=x64
@@ -40,6 +40,7 @@ ONNX_OS?=linux
export BUILD_TYPE?= export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export GGML_CMAKE_ARGS?=
export CMAKE_ARGS?= export CMAKE_ARGS?=
export BACKEND_LIBS?= export BACKEND_LIBS?=
@@ -88,9 +89,45 @@ ifndef UNAME_S
UNAME_S := $(shell uname -s) UNAME_S := $(shell uname -s)
endif endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS # IF native is false, we add -DGGML_NATIVE=OFF to GGML_CMAKE_ARGS
ifeq ($(NATIVE),false) ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF GGML_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
GGML_CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to GGML_CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
GGML_CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to GGML_CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
GGML_CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
GGML_CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
GGML_CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
else
GGML_CMAKE_ARGS+=-DGGML_METAL=ON
GGML_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif endif
# Detect if we are running on arm64 # Detect if we are running on arm64
@@ -117,7 +154,7 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed. # disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal) else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1 export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1 export GGML_NO_METAL=1
endif endif
@@ -142,7 +179,7 @@ ifeq ($(BUILD_TYPE),cublas)
endif endif
ifeq ($(BUILD_TYPE),vulkan) ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1 GGML_CMAKE_ARGS+=-DGGML_VULKAN=1
endif endif
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
@@ -164,7 +201,7 @@ ifeq ($(BUILD_TYPE),hipblas)
export GGML_HIP=1 export GGML_HIP=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)" AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" GGML_CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif endif
@@ -235,6 +272,8 @@ ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS= GRPC_BACKENDS=
endif endif
export CMAKE_ARGS?=$(GGML_CMAKE_ARGS)
.PHONY: all test build vendor get-sources prepare-sources prepare .PHONY: all test build vendor get-sources prepare-sources prepare
all: help all: help
@@ -302,8 +341,14 @@ sources/stablediffusion-ggml.cpp:
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \ git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch git submodule update --init --recursive --depth 1 --single-branch
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a cd sources/stablediffusion-ggml.cpp && \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) .. && \
cmake --build . --config Release
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a $(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc

View File

@@ -126,10 +126,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
## 🚀 [Features](https://localai.io/features/) ## 🚀 [Features](https://localai.io/features/)
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table)) - 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/) - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`) - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation](https://localai.io/features/image-generation) - 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) - 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
@@ -137,7 +137,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
- 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/) - 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🔊 Voice activity detection (Silero-VAD support)
- 🌍 Integrated WebUI! - 🌍 Integrated WebUI!
## 💻 Usage ## 💻 Usage
@@ -160,7 +159,6 @@ Model galleries
Other: Other:
- Helm chart https://github.com/go-skynet/helm-charts - Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin - VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
- Terminal utility https://github.com/djcopley/ShellOracle - Terminal utility https://github.com/djcopley/ShellOracle
- Local Smart assistant https://github.com/mudler/LocalAGI - Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision - Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision

View File

@@ -159,7 +159,6 @@ message Reply {
bytes message = 1; bytes message = 1;
int32 tokens = 2; int32 tokens = 2;
int32 prompt_tokens = 3; int32 prompt_tokens = 3;
bytes audio = 5;
} }
message ModelOptions { message ModelOptions {

View File

@@ -7,42 +7,6 @@ BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
llama.cpp: llama.cpp:
mkdir -p llama.cpp mkdir -p llama.cpp
cd llama.cpp && \ cd llama.cpp && \
@@ -70,7 +34,7 @@ clean: purge
rm -rf llama.cpp rm -rf llama.cpp
grpc-server: llama.cpp llama.cpp/examples/grpc-server grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" @echo "Building grpc-server for llama.cpp with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \ +bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)" cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"

View File

@@ -428,7 +428,6 @@ struct llama_server_context
{ {
llama_model *model = nullptr; llama_model *model = nullptr;
llama_context *ctx = nullptr; llama_context *ctx = nullptr;
const llama_vocab * vocab = nullptr;
clip_ctx *clp_ctx = nullptr; clip_ctx *clp_ctx = nullptr;
@@ -440,7 +439,6 @@ struct llama_server_context
bool clean_kv_cache = true; bool clean_kv_cache = true;
bool all_slots_are_idle = false; bool all_slots_are_idle = false;
bool add_bos_token = true; bool add_bos_token = true;
bool has_eos_token = true;
int32_t n_ctx; // total context for all clients / slots int32_t n_ctx; // total context for all clients / slots
@@ -494,8 +492,8 @@ struct llama_server_context
} }
common_init_result common_init = common_init_from_params(params); common_init_result common_init = common_init_from_params(params);
model = common_init.model.release(); model = common_init.model;
ctx = common_init.context.release(); ctx = common_init.context;
if (model == nullptr) if (model == nullptr)
{ {
LOG_ERR("unable to load model: %s", params.model.c_str()); LOG_ERR("unable to load model: %s", params.model.c_str());
@@ -504,7 +502,7 @@ struct llama_server_context
if (multimodal) { if (multimodal) {
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_model_n_embd(model); const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) { if (n_embd_clip != n_embd_llm) {
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm); LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx); llama_free(ctx);
@@ -513,15 +511,23 @@ struct llama_server_context
} }
} }
vocab = llama_model_get_vocab(model);
n_ctx = llama_n_ctx(ctx); n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_vocab_get_add_bos(vocab); add_bos_token = llama_add_bos_token(model);
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
return true; return true;
} }
void validate_model_chat_template(server_params & sparams) {
llama_chat_message chat[] = {{"user", "test"}};
std::vector<char> buf(1);
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
if (res < 0) {
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
}
}
llama_client_slot* get_active_slot() { llama_client_slot* get_active_slot() {
for (llama_client_slot& slot : slots) { for (llama_client_slot& slot : slots) {
// Check if the slot is currently processing // Check if the slot is currently processing
@@ -675,6 +681,7 @@ struct llama_server_context
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->sparams.seed = json_value(data, "seed", default_sparams.seed); slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
@@ -719,8 +726,8 @@ struct llama_server_context
slot->prompt = ""; slot->prompt = "";
} }
if (json_value(data, "ignore_eos", false) && has_eos_token) { if (json_value(data, "ignore_eos", false)) {
slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY}); slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
} }
/* /*
slot->sparams.penalty_prompt_tokens.clear(); slot->sparams.penalty_prompt_tokens.clear();
@@ -759,13 +766,13 @@ struct llama_server_context
} }
} }
*/ */
slot->sparams.logit_bias.clear(); slot->sparams.logit_bias.clear();
const auto &logit_bias = data.find("logit_bias"); const auto &logit_bias = data.find("logit_bias");
if (logit_bias != data.end() && logit_bias->is_array()) if (logit_bias != data.end() && logit_bias->is_array())
{ {
const llama_vocab * vocab = llama_model_get_vocab(model); const int n_vocab = llama_n_vocab(model);
const int n_vocab = llama_vocab_n_tokens(vocab);
for (const auto &el : *logit_bias) for (const auto &el : *logit_bias)
{ {
if (el.is_array() && el.size() == 2) if (el.is_array() && el.size() == 2)
@@ -794,7 +801,7 @@ struct llama_server_context
} }
else if (el[0].is_string()) else if (el[0].is_string())
{ {
auto toks = common_tokenize(vocab, el[0].get<std::string>(), false); auto toks = common_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks) for (auto tok : toks)
{ {
slot->sparams.logit_bias.push_back({tok, bias}); slot->sparams.logit_bias.push_back({tok, bias});
@@ -1124,7 +1131,7 @@ struct llama_server_context
slot.has_next_token = false; slot.has_next_token = false;
} }
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok)) if (result.tok == llama_token_eos(model))
{ {
slot.stopped_eos = true; slot.stopped_eos = true;
slot.has_next_token = false; slot.has_next_token = false;
@@ -1206,12 +1213,13 @@ struct llama_server_context
{"mirostat", slot.sparams.mirostat}, {"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau}, {"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta}, {"mirostat_eta", slot.sparams.mirostat_eta},
{"penalize_nl", slot.sparams.penalize_nl},
{"stop", slot.params.antiprompt}, {"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict}, {"n_predict", slot.params.n_predict},
{"n_keep", params.n_keep}, {"n_keep", params.n_keep},
{"ignore_eos", slot.sparams.ignore_eos}, {"ignore_eos", slot.sparams.ignore_eos},
{"stream", slot.params.stream}, {"stream", slot.params.stream},
// {"logit_bias", slot.sparams.logit_bias}, // {"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs}, {"n_probs", slot.sparams.n_probs},
{"min_keep", slot.sparams.min_keep}, {"min_keep", slot.sparams.min_keep},
{"grammar", slot.sparams.grammar}, {"grammar", slot.sparams.grammar},
@@ -1319,7 +1327,7 @@ struct llama_server_context
res.error = false; res.error = false;
res.stop = true; res.stop = true;
const int n_embd = llama_model_n_embd(model); const int n_embd = llama_n_embd(model);
if (!params.embedding) if (!params.embedding)
{ {
LOG_WARNING("embedding disabled", { LOG_WARNING("embedding disabled", {
@@ -1418,7 +1426,7 @@ struct llama_server_context
n_eval = n_batch; n_eval = n_batch;
} }
const int n_embd = llama_model_n_embd(model); const int n_embd = llama_n_embd(model);
float * embd = img.image_embedding + i * n_embd; float * embd = img.image_embedding + i * n_embd;
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0); llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
if (llama_decode(ctx, llava_batch.batch)) if (llama_decode(ctx, llava_batch.batch))
@@ -1699,11 +1707,11 @@ struct llama_server_context
suffix_tokens.erase(suffix_tokens.begin()); suffix_tokens.erase(suffix_tokens.begin());
} }
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab)); prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab)); prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
prefix_tokens.push_back(llama_vocab_fim_mid(vocab)); prefix_tokens.push_back(llama_token_middle(model));
prompt_tokens = prefix_tokens; prompt_tokens = prefix_tokens;
} }
else else
@@ -2104,6 +2112,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat); // slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau); // slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); // slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
// slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); // slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
// slot->params.seed = json_value(data, "seed", default_params.seed); // slot->params.seed = json_value(data, "seed", default_params.seed);
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); // slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
@@ -2126,6 +2135,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["mirostat"] = predict->mirostat(); data["mirostat"] = predict->mirostat();
data["mirostat_tau"] = predict->mirostattau(); data["mirostat_tau"] = predict->mirostattau();
data["mirostat_eta"] = predict->mirostateta(); data["mirostat_eta"] = predict->mirostateta();
data["penalize_nl"] = predict->penalizenl();
data["n_keep"] = predict->nkeep(); data["n_keep"] = predict->nkeep();
data["seed"] = predict->seed(); data["seed"] = predict->seed();
data["grammar"] = predict->grammar(); data["grammar"] = predict->grammar();
@@ -2171,6 +2181,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// llama.params.sparams.mirostat = predict->mirostat(); // llama.params.sparams.mirostat = predict->mirostat();
// llama.params.sparams.mirostat_tau = predict->mirostattau(); // llama.params.sparams.mirostat_tau = predict->mirostattau();
// llama.params.sparams.mirostat_eta = predict->mirostateta(); // llama.params.sparams.mirostat_eta = predict->mirostateta();
// llama.params.sparams.penalize_nl = predict->penalizenl();
// llama.params.n_keep = predict->nkeep(); // llama.params.n_keep = predict->nkeep();
// llama.params.seed = predict->seed(); // llama.params.seed = predict->seed();
// llama.params.sparams.grammar = predict->grammar(); // llama.params.sparams.grammar = predict->grammar();
@@ -2217,35 +2228,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// } // }
// } // }
const std::vector<ggml_type> kv_cache_types = {
GGML_TYPE_F32,
GGML_TYPE_F16,
GGML_TYPE_BF16,
GGML_TYPE_Q8_0,
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1,
GGML_TYPE_IQ4_NL,
GGML_TYPE_Q5_0,
GGML_TYPE_Q5_1,
};
static ggml_type kv_cache_type_from_str(const std::string & s) {
for (const auto & type : kv_cache_types) {
if (ggml_type_name(type) == s) {
return type;
}
}
throw std::runtime_error("Unsupported cache type: " + s);
}
static std::string get_all_kv_cache_types() {
std::ostringstream msg;
for (const auto & type : kv_cache_types) {
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
}
return msg.str();
}
static void params_parse(const backend::ModelOptions* request, static void params_parse(const backend::ModelOptions* request,
common_params & params) { common_params & params) {
@@ -2260,10 +2242,10 @@ static void params_parse(const backend::ModelOptions* request,
// params.model_alias ?? // params.model_alias ??
params.model_alias = request->modelfile(); params.model_alias = request->modelfile();
if (!request->cachetypekey().empty()) { if (!request->cachetypekey().empty()) {
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey()); params.cache_type_k = request->cachetypekey();
} }
if (!request->cachetypevalue().empty()) { if (!request->cachetypevalue().empty()) {
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue()); params.cache_type_v = request->cachetypevalue();
} }
params.n_ctx = request->contextsize(); params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory(); //params.memory_f16 = request->f16memory();

View File

@@ -1,13 +1,13 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 3cd0d2fa..6c5e811a 100644 index 342042ff..224db9b5 100644
--- a/examples/llava/clip.cpp --- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima @@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches)); int* patches_data = (int*)malloc(ggml_nbytes(patches));
for (int i = 0; i < num_patches; i++) { for (int i = 0; i < num_patches; i++) {
- patches_data[i] = i + 1; - patches_data[i] = i + 1;
+ patches_data[i] = i; + patches_data[i] = i;
} }
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches)); ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
free(patches_data); free(patches_data);

View File

@@ -2,95 +2,20 @@ INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./) LIBRARY_PATH := $(abspath ./)
AR?=ar AR?=ar
CMAKE_ARGS?=
BUILD_TYPE?= BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
# keep standard at C11 and C++11 # keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
# ifeq ($(BUILD_TYPE),sycl_f16)
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
# endif
# ifeq ($(BUILD_TYPE),sycl_f32)
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
# endif
# warnings # warnings
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
# Find all .a archives in ARCHIVE_DIR
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
GGML_ARCHIVE_DIR := build/ggml/src/
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
# Name of the single merged library
COMBINED_LIB := libggmlall.a
# Rule to merge all the .a files into one
$(COMBINED_LIB): $(ALL_ARCHIVES)
@echo "Merging all .a into $(COMBINED_LIB)"
rm -f $@
mkdir -p merge-tmp
for a in $(ALL_ARCHIVES); do \
( cd merge-tmp && ar x ../$$a ); \
done
( cd merge-tmp && ar rcs ../$@ *.o )
# Ensure we have a proper index
ranlib $@
# Clean up
rm -rf merge-tmp
build/libstable-diffusion.a:
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
cmake --build . --config Release"
else
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
cmake --build . --config Release
endif
$(MAKE) $(COMBINED_LIB)
gosd.o: gosd.o:
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c $(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
libsd.a: gosd.o libsd.a: gosd.o
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
$(AR) rcs libsd.a gosd.o $(AR) rcs libsd.a gosd.o
clean: clean:
rm -rf gosd.o libsd.a build $(COMBINED_LIB) rm -f gosd.o libsd.a

View File

@@ -1,7 +1,7 @@
package main package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include // #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp // #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
// #include <gosd.h> // #include <gosd.h>
// #include <stdlib.h> // #include <stdlib.h>
import "C" import "C"

View File

@@ -21,8 +21,8 @@ func (vad *VAD) Load(opts *pb.ModelOptions) error {
SampleRate: 16000, SampleRate: 16000,
//WindowSize: 1024, //WindowSize: 1024,
Threshold: 0.5, Threshold: 0.5,
MinSilenceDurationMs: 100, MinSilenceDurationMs: 0,
SpeechPadMs: 30, SpeechPadMs: 0,
}) })
if err != nil { if err != nil {
return fmt.Errorf("create silero detector: %w", err) return fmt.Errorf("create silero detector: %w", err)

View File

@@ -1,6 +1,5 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
auto-gptq==0.7.1 auto-gptq==0.7.1
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -1,9 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
torchaudio==2.3.1+cxx11.abi torchaudio
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
bark==0.1.5 bark==0.1.5
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi

View File

@@ -17,9 +17,6 @@
# LIMIT_TARGETS="cublas12" # LIMIT_TARGETS="cublas12"
# source $(dirname $0)/../common/libbackend.sh # source $(dirname $0)/../common/libbackend.sh
# #
PYTHON_VERSION="3.10"
function init() { function init() {
# Name of the backend (directory name) # Name of the backend (directory name)
BACKEND_NAME=${PWD##*/} BACKEND_NAME=${PWD##*/}
@@ -91,7 +88,7 @@ function getBuildProfile() {
# always result in an activated virtual environment # always result in an activated virtual environment
function ensureVenv() { function ensureVenv() {
if [ ! -d "${EDIR}/venv" ]; then if [ ! -d "${EDIR}/venv" ]; then
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv uv venv ${EDIR}/venv
echo "virtualenv created" echo "virtualenv created"
fi fi

View File

@@ -1,5 +1,4 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
grpcio-tools grpcio-tools

View File

@@ -1,10 +1,9 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
torchaudio==2.3.1+cxx11.abi torchaudio
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate
coqui-tts coqui-tts

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
packaging==24.1 packaging==24.1

View File

@@ -17,7 +17,7 @@ import backend_pb2_grpc
import grpc import grpc
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.pipelines.stable_diffusion import safety_checker
@@ -275,13 +275,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.LowVRAM: if request.LowVRAM:
self.pipe.enable_model_cpu_offload() self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "SanaPipeline":
self.pipe = SanaPipeline.from_pretrained(
request.Model,
variant="bf16",
torch_dtype=torch.bfloat16)
self.pipe.vae.to(torch.bfloat16)
self.pipe.text_encoder.to(torch.bfloat16)
if CLIPSKIP and request.CLIPSkip != 0: if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip self.clip_skip = request.CLIPSkip

View File

@@ -1,10 +1,9 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
torchvision==0.18.1+cxx11.abi torchvision
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -1,5 +1,5 @@
setuptools setuptools
grpcio==1.69.0 grpcio==1.68.1
pillow pillow
protobuf protobuf
certifi certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
wheel wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi

View File

@@ -1,7 +1,3 @@
torch==2.4.1 torch==2.4.1
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -2,7 +2,3 @@
torch==2.4.1+cu118 torch==2.4.1+cu118
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -1,7 +1,3 @@
torch==2.4.1 torch==2.4.1
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -2,7 +2,3 @@
torch==2.4.1+rocm6.0 torch==2.4.1+rocm6.0
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -1,15 +1,14 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
torchaudio==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
librosa==0.9.1 librosa==0.9.1
faster-whisper==0.9.0 faster-whisper==0.9.0
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect==7.0.0 inflect==7.0.0
unidecode==1.3.7 unidecode==1.3.7

View File

@@ -1,17 +1,20 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
librosa librosa
faster-whisper faster-whisper
pydub==0.25.1
wavmark==0.0.3
numpy==1.22.0
eng_to_ipa==0.0.2
inflect inflect
unidecode unidecode
whisper-timestamped
openai openai
python-dotenv python-dotenv
pypinyin pypinyin
cn2an==0.5.22 cn2an==0.5.22
numpy==1.22.0
networkx==2.8.8 networkx==2.8.8
jieba==0.42.1 jieba==0.42.1
gradio==5.9.1 gradio==3.48.0
langid==1.1.6 langid==1.1.6
llvmlite==0.43.0 llvmlite==0.43.0
setuptools

View File

@@ -1,8 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
torchaudio==2.3.1+cxx11.abi torchaudio
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,3 @@
grpcio==1.69.0 grpcio==1.68.1
certifi certifi
llvmlite==0.43.0 llvmlite==0.43.0
setuptools

View File

@@ -1,9 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
transformers transformers
accelerate accelerate
torch==2.3.1+cxx11.abi torch
oneccl_bind_pt==2.3.100+xpu
rerankers[transformers] rerankers[transformers]
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi

View File

@@ -1,9 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
setuptools setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
accelerate accelerate
sentence-transformers==3.3.1 sentence-transformers==3.3.1
transformers transformers

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
datasets datasets

View File

@@ -1,8 +1,7 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
transformers transformers
oneccl_bind_pt==2.3.100+xpu
accelerate accelerate
torch==2.3.1+cxx11.abi torch
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
scipy==1.14.0 scipy==1.14.0
certifi certifi

View File

@@ -1,7 +1,6 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
torch==2.3.1+cxx11.abi torch
oneccl_bind_pt==2.3.100+xpu
optimum[openvino] optimum[openvino]
intel-extension-for-transformers intel-extension-for-transformers
bitsandbytes bitsandbytes

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
setuptools setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,7 +1,7 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
accelerate accelerate
torch==2.3.1+cxx11.abi torch
torchaudio==2.3.1+cxx11.abi torchaudio
optimum[openvino] optimum[openvino]
oneccl_bind_pt==2.3.100+xpu setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,4 +1,3 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
setuptools

View File

@@ -1,9 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu intel-extension-for-pytorch
accelerate accelerate
torch==2.3.1+cxx11.abi torch
transformers transformers
optimum[openvino] optimum[openvino]
setuptools setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
bitsandbytes bitsandbytes
oneccl_bind_pt==2.3.100+xpu

View File

@@ -1,4 +1,4 @@
grpcio==1.69.0 grpcio==1.68.1
protobuf protobuf
certifi certifi
setuptools setuptools

View File

@@ -22,9 +22,8 @@ import (
) )
type LLMResponse struct { type LLMResponse struct {
Response string // should this be []byte? Response string // should this be []byte?
Usage TokenUsage Usage TokenUsage
AudioOutput string
} }
type TokenUsage struct { type TokenUsage struct {
@@ -118,12 +117,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
ss := "" ss := ""
var partialRune []byte var partialRune []byte
err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) { err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
msg := reply.Message partialRune = append(partialRune, chars...)
partialRune = append(partialRune, msg...)
tokenUsage.Prompt = int(reply.PromptTokens)
tokenUsage.Completion = int(reply.Tokens)
for len(partialRune) > 0 { for len(partialRune) > 0 {
r, size := utf8.DecodeRune(partialRune) r, size := utf8.DecodeRune(partialRune)
@@ -137,10 +132,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
partialRune = partialRune[size:] partialRune = partialRune[size:]
} }
if len(msg) == 0 {
tokenCallback("", tokenUsage)
}
}) })
return LLMResponse{ return LLMResponse{
Response: ss, Response: ss,

View File

@@ -38,7 +38,6 @@ type BackendConfig struct {
TemplateConfig TemplateConfig `yaml:"template"` TemplateConfig TemplateConfig `yaml:"template"`
KnownUsecaseStrings []string `yaml:"known_usecases"` KnownUsecaseStrings []string `yaml:"known_usecases"`
KnownUsecases *BackendConfigUsecases `yaml:"-"` KnownUsecases *BackendConfigUsecases `yaml:"-"`
Pipeline Pipeline `yaml:"pipeline"`
PromptStrings, InputStrings []string `yaml:"-"` PromptStrings, InputStrings []string `yaml:"-"`
InputToken [][]int `yaml:"-"` InputToken [][]int `yaml:"-"`
@@ -77,18 +76,6 @@ type BackendConfig struct {
Options []string `yaml:"options"` Options []string `yaml:"options"`
} }
// Pipeline defines other models to use for audio-to-audio
type Pipeline struct {
TTS string `yaml:"tts"`
LLM string `yaml:"llm"`
Transcription string `yaml:"transcription"`
VAD string `yaml:"vad"`
}
func (p Pipeline) IsNotConfigured() bool {
return p.LLM == "" || p.TTS == "" || p.Transcription == ""
}
type File struct { type File struct {
Filename string `yaml:"filename" json:"filename"` Filename string `yaml:"filename" json:"filename"`
SHA256 string `yaml:"sha256" json:"sha256"` SHA256 string `yaml:"sha256" json:"sha256"`

View File

@@ -7,7 +7,6 @@ import (
"net/http" "net/http"
"github.com/dave-gray101/v2keyauth" "github.com/dave-gray101/v2keyauth"
"github.com/gofiber/websocket/v2"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/localai"
@@ -88,17 +87,6 @@ func API(application *application.Application) (*fiber.App, error) {
router := fiber.New(fiberCfg) router := fiber.New(fiberCfg)
router.Use(middleware.StripPathPrefix())
router.Use("/v1/realtime", func(c *fiber.Ctx) error {
if websocket.IsWebSocketUpgrade(c) {
// Returns true if the client requested upgrade to the WebSocket protocol
return c.Next()
}
return nil
})
router.Hooks().OnListen(func(listenData fiber.ListenData) error { router.Hooks().OnListen(func(listenData fiber.ListenData) error {
scheme := "http" scheme := "http"
if listenData.TLS { if listenData.TLS {

View File

@@ -237,31 +237,6 @@ func postInvalidRequest(url string) (error, int) {
return nil, resp.StatusCode return nil, resp.StatusCode
} }
func getRequest(url string, header http.Header) (error, int, []byte) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return err, -1, nil
}
req.Header = header
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return err, -1, nil
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return err, -1, nil
}
return nil, resp.StatusCode, body
}
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml` const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
//go:embed backend-assets/* //go:embed backend-assets/*
@@ -370,33 +345,6 @@ var _ = Describe("API test", func() {
}) })
}) })
Context("URL routing Tests", func() {
It("Should support reverse-proxy when unauthenticated", func() {
err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
"X-Forwarded-Proto": {"https"},
"X-Forwarded-Host": {"example.org"},
"X-Forwarded-Prefix": {"/myprefix/"},
})
Expect(err).To(BeNil(), "error")
Expect(sc).To(Equal(401), "status code")
Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
})
It("Should support reverse-proxy when authenticated", func() {
err, sc, body := getRequest("http://127.0.0.1:9090/myprefix/", http.Header{
"Authorization": {bearerKey},
"X-Forwarded-Proto": {"https"},
"X-Forwarded-Host": {"example.org"},
"X-Forwarded-Prefix": {"/myprefix/"},
})
Expect(err).To(BeNil(), "error")
Expect(sc).To(Equal(200), "status code")
Expect(string(body)).To(ContainSubstring(`<base href="https://example.org/myprefix/" />`), "body")
})
})
Context("Applying models", func() { Context("Applying models", func() {
It("applies models from a gallery", func() { It("applies models from a gallery", func() {
@@ -756,7 +704,7 @@ var _ = Describe("API test", func() {
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp)) Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat))) Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave"))) Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
}) })
It("installs and is capable to generate images", Label("stablediffusion"), func() { It("installs and is capable to generate images", Label("stablediffusion"), func() {
if runtime.GOOS != "linux" { if runtime.GOOS != "linux" {

View File

@@ -19,11 +19,9 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
if ctx.Params("model") != "" { if ctx.Params("model") != "" {
modelInput = ctx.Params("model") modelInput = ctx.Params("model")
} }
if ctx.Query("model") != "" { if ctx.Query("model") != "" {
modelInput = ctx.Query("model") modelInput = ctx.Query("model")
} }
// Set model from bearer token, if available // Set model from bearer token, if available
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)

View File

@@ -16,7 +16,7 @@ func installButton(galleryName string) elem.Node {
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong", "class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-swap": "outerHTML", "hx-swap": "outerHTML",
// post the Model ID as param // post the Model ID as param
"hx-post": "browse/install/model/" + galleryName, "hx-post": "/browse/install/model/" + galleryName,
}, },
elem.I( elem.I(
attrs.Props{ attrs.Props{
@@ -36,7 +36,7 @@ func reInstallButton(galleryName string) elem.Node {
"hx-target": "#action-div-" + dropBadChars(galleryName), "hx-target": "#action-div-" + dropBadChars(galleryName),
"hx-swap": "outerHTML", "hx-swap": "outerHTML",
// post the Model ID as param // post the Model ID as param
"hx-post": "browse/install/model/" + galleryName, "hx-post": "/browse/install/model/" + galleryName,
}, },
elem.I( elem.I(
attrs.Props{ attrs.Props{
@@ -80,7 +80,7 @@ func deleteButton(galleryID string) elem.Node {
"hx-target": "#action-div-" + dropBadChars(galleryID), "hx-target": "#action-div-" + dropBadChars(galleryID),
"hx-swap": "outerHTML", "hx-swap": "outerHTML",
// post the Model ID as param // post the Model ID as param
"hx-post": "browse/delete/model/" + galleryID, "hx-post": "/browse/delete/model/" + galleryID,
}, },
elem.I( elem.I(
attrs.Props{ attrs.Props{

View File

@@ -47,7 +47,7 @@ func searchableElement(text, icon string) elem.Node {
// "value": text, // "value": text,
//"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2", //"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
"href": "#!", "href": "#!",
"hx-post": "browse/search/models", "hx-post": "/browse/search/models",
"hx-target": "#search-results", "hx-target": "#search-results",
// TODO: this doesn't work // TODO: this doesn't work
// "hx-vals": `{ \"search\": \"` + text + `\" }`, // "hx-vals": `{ \"search\": \"` + text + `\" }`,

View File

@@ -64,7 +64,7 @@ func StartProgressBar(uid, progress, text string) string {
return elem.Div( return elem.Div(
attrs.Props{ attrs.Props{
"hx-trigger": "done", "hx-trigger": "done",
"hx-get": "browse/job/" + uid, "hx-get": "/browse/job/" + uid,
"hx-swap": "outerHTML", "hx-swap": "outerHTML",
"hx-target": "this", "hx-target": "this",
}, },
@@ -77,7 +77,7 @@ func StartProgressBar(uid, progress, text string) string {
}, },
elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
elem.Div(attrs.Props{ elem.Div(attrs.Props{
"hx-get": "browse/job/progress/" + uid, "hx-get": "/browse/job/progress/" + uid,
"hx-trigger": "every 600ms", "hx-trigger": "every 600ms",
"hx-target": "this", "hx-target": "this",
"hx-swap": "innerHTML", "hx-swap": "innerHTML",

View File

@@ -6,7 +6,6 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/explorer" "github.com/mudler/LocalAI/core/explorer"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/internal"
) )
@@ -15,7 +14,6 @@ func Dashboard() func(*fiber.Ctx) error {
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI API - " + internal.PrintableVersion(), "Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
"BaseURL": utils.BaseURL(c),
} }
if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {

View File

@@ -9,7 +9,6 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
@@ -83,8 +82,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
Galleries: mgs.galleries, Galleries: mgs.galleries,
ConfigURL: input.ConfigURL, ConfigURL: input.ConfigURL,
} }
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})
} }
} }
@@ -107,7 +105,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
return err return err
} }
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())}) return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
} }
} }

View File

@@ -4,7 +4,6 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/internal"
@@ -33,7 +32,6 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI API - " + internal.PrintableVersion(), "Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
"BaseURL": utils.BaseURL(c),
"Models": modelsWithoutConfig, "Models": modelsWithoutConfig,
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"GalleryConfig": galleryConfigs, "GalleryConfig": galleryConfigs,

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,186 +0,0 @@
package openai
import (
"context"
"fmt"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
grpcClient "github.com/mudler/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
"google.golang.org/grpc"
)
var (
_ Model = new(wrappedModel)
_ Model = new(anyToAnyModel)
)
// wrappedModel represent a model which does not support Any-to-Any operations
// This means that we will fake an Any-to-Any model by overriding some of the gRPC client methods
// which are for Any-To-Any models, but instead we will call a pipeline (for e.g STT->LLM->TTS)
type wrappedModel struct {
TTSConfig *config.BackendConfig
TranscriptionConfig *config.BackendConfig
LLMConfig *config.BackendConfig
TTSClient grpcClient.Backend
TranscriptionClient grpcClient.Backend
LLMClient grpcClient.Backend
VADConfig *config.BackendConfig
VADClient grpcClient.Backend
}
// anyToAnyModel represent a model which supports Any-to-Any operations
// We have to wrap this out as well because we want to load two models one for VAD and one for the actual model.
// In the future there could be models that accept continous audio input only so this design will be useful for that
type anyToAnyModel struct {
LLMConfig *config.BackendConfig
LLMClient grpcClient.Backend
VADConfig *config.BackendConfig
VADClient grpcClient.Backend
}
func (m *wrappedModel) VAD(ctx context.Context, in *proto.VADRequest, opts ...grpc.CallOption) (*proto.VADResponse, error) {
return m.VADClient.VAD(ctx, in)
}
func (m *anyToAnyModel) VAD(ctx context.Context, in *proto.VADRequest, opts ...grpc.CallOption) (*proto.VADResponse, error) {
return m.VADClient.VAD(ctx, in)
}
func (m *wrappedModel) Predict(ctx context.Context, in *proto.PredictOptions, opts ...grpc.CallOption) (*proto.Reply, error) {
// TODO: Convert with pipeline (audio to text, text to llm, result to tts, and return it)
// sound.BufferAsWAV(audioData, "audio.wav")
return m.LLMClient.Predict(ctx, in)
}
func (m *wrappedModel) PredictStream(ctx context.Context, in *proto.PredictOptions, f func(reply *proto.Reply), opts ...grpc.CallOption) error {
// TODO: Convert with pipeline (audio to text, text to llm, result to tts, and return it)
return m.LLMClient.PredictStream(ctx, in, f)
}
func (m *anyToAnyModel) Predict(ctx context.Context, in *proto.PredictOptions, opts ...grpc.CallOption) (*proto.Reply, error) {
return m.LLMClient.Predict(ctx, in)
}
func (m *anyToAnyModel) PredictStream(ctx context.Context, in *proto.PredictOptions, f func(reply *proto.Reply), opts ...grpc.CallOption) error {
return m.LLMClient.PredictStream(ctx, in, f)
}
// returns and loads either a wrapped model or a model that support audio-to-audio
func newModel(cfg *config.BackendConfig, cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, modelName string) (Model, error) {
// Prepare VAD model
cfgVAD, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.VAD, ml.ModelPath)
if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if !cfgVAD.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
opts := backend.ModelOptions(*cfgVAD, appConfig)
VADClient, err := ml.Load(opts...)
if err != nil {
return nil, fmt.Errorf("failed to load tts model: %w", err)
}
// If we don't have Wrapped model definitions, just return a standard model
if cfg.Pipeline.IsNotConfigured() {
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
cfgAnyToAny, err := cl.LoadBackendConfigFileByName(cfg.Model, ml.ModelPath)
if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if !cfgAnyToAny.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
opts := backend.ModelOptions(*cfgAnyToAny, appConfig)
anyToAnyClient, err := ml.Load(opts...)
if err != nil {
return nil, fmt.Errorf("failed to load tts model: %w", err)
}
return &anyToAnyModel{
LLMConfig: cfgAnyToAny,
LLMClient: anyToAnyClient,
VADConfig: cfgVAD,
VADClient: VADClient,
}, nil
}
log.Debug().Msg("Loading a wrapped model")
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
cfgLLM, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.LLM, ml.ModelPath)
if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if !cfgLLM.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
cfgTTS, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.TTS, ml.ModelPath)
if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if !cfgTTS.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
cfgSST, err := cl.LoadBackendConfigFileByName(cfg.Pipeline.Transcription, ml.ModelPath)
if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if !cfgSST.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
opts = backend.ModelOptions(*cfgTTS, appConfig)
ttsClient, err := ml.Load(opts...)
if err != nil {
return nil, fmt.Errorf("failed to load tts model: %w", err)
}
opts = backend.ModelOptions(*cfgSST, appConfig)
transcriptionClient, err := ml.Load(opts...)
if err != nil {
return nil, fmt.Errorf("failed to load SST model: %w", err)
}
opts = backend.ModelOptions(*cfgLLM, appConfig)
llmClient, err := ml.Load(opts...)
if err != nil {
return nil, fmt.Errorf("failed to load LLM model: %w", err)
}
return &wrappedModel{
TTSConfig: cfgTTS,
TranscriptionConfig: cfgSST,
LLMConfig: cfgLLM,
TTSClient: ttsClient,
TranscriptionClient: transcriptionClient,
LLMClient: llmClient,
VADConfig: cfgVAD,
VADClient: VADClient,
}, nil
}

View File

@@ -48,25 +48,6 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
return modelFile, input, err return modelFile, input, err
} }
// func readWSRequest(c *websocket.Conn, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
// input := new(schema.OpenAIRequest)
// input.Model = c.Query("name")
// received, _ := json.Marshal(input)
// ctx, cancel := context.WithCancel(o.Context)
// input.Context = ctx
// input.Cancel = cancel
// log.Debug().Msgf("Request received: %s", string(received))
// modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
// return modelFile, input, err
// }
func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) { func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
if input.Echo { if input.Echo {
config.Echo = input.Echo config.Echo = input.Echo

View File

@@ -7,7 +7,6 @@ import (
"github.com/gofiber/fiber/v2/middleware/favicon" "github.com/gofiber/fiber/v2/middleware/favicon"
"github.com/gofiber/fiber/v2/middleware/filesystem" "github.com/gofiber/fiber/v2/middleware/filesystem"
"github.com/mudler/LocalAI/core/explorer" "github.com/mudler/LocalAI/core/explorer"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/http/routes" "github.com/mudler/LocalAI/core/http/routes"
) )
@@ -23,7 +22,6 @@ func Explorer(db *explorer.Database) *fiber.App {
app := fiber.New(fiberCfg) app := fiber.New(fiberCfg)
app.Use(middleware.StripPathPrefix())
routes.RegisterExplorerRoutes(app, db) routes.RegisterExplorerRoutes(app, db)
httpFS := http.FS(embedDirStatic) httpFS := http.FS(embedDirStatic)

View File

@@ -8,7 +8,6 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/keyauth" "github.com/gofiber/fiber/v2/middleware/keyauth"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/utils"
) )
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware // This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
@@ -40,9 +39,7 @@ func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.Er
if applicationConfig.OpaqueErrors { if applicationConfig.OpaqueErrors {
return ctx.SendStatus(401) return ctx.SendStatus(401)
} }
return ctx.Status(401).Render("views/login", fiber.Map{ return ctx.Status(401).Render("views/login", nil)
"BaseURL": utils.BaseURL(ctx),
})
} }
if applicationConfig.OpaqueErrors { if applicationConfig.OpaqueErrors {
return ctx.SendStatus(500) return ctx.SendStatus(500)

View File

@@ -1,36 +0,0 @@
package middleware
import (
"strings"
"github.com/gofiber/fiber/v2"
)
// StripPathPrefix returns a middleware that strips a path prefix from the request path.
// The path prefix is obtained from the X-Forwarded-Prefix HTTP request header.
func StripPathPrefix() fiber.Handler {
return func(c *fiber.Ctx) error {
for _, prefix := range c.GetReqHeaders()["X-Forwarded-Prefix"] {
if prefix != "" {
path := c.Path()
pos := len(prefix)
if prefix[pos-1] == '/' {
pos--
} else {
prefix += "/"
}
if strings.HasPrefix(path, prefix) {
c.Path(path[pos:])
break
} else if prefix[:pos] == path {
c.Redirect(prefix)
return nil
}
}
}
return c.Next()
}
}

View File

@@ -1,121 +0,0 @@
package middleware
import (
"net/http/httptest"
"testing"
"github.com/gofiber/fiber/v2"
"github.com/stretchr/testify/require"
)
func TestStripPathPrefix(t *testing.T) {
var actualPath string
app := fiber.New()
app.Use(StripPathPrefix())
app.Get("/hello/world", func(c *fiber.Ctx) error {
actualPath = c.Path()
return nil
})
app.Get("/", func(c *fiber.Ctx) error {
actualPath = c.Path()
return nil
})
for _, tc := range []struct {
name string
path string
prefixHeader []string
expectStatus int
expectPath string
}{
{
name: "without prefix and header",
path: "/hello/world",
expectStatus: 200,
expectPath: "/hello/world",
},
{
name: "without prefix and headers on root path",
path: "/",
expectStatus: 200,
expectPath: "/",
},
{
name: "without prefix but header",
path: "/hello/world",
prefixHeader: []string{"/otherprefix/"},
expectStatus: 200,
expectPath: "/hello/world",
},
{
name: "with prefix but non-matching header",
path: "/prefix/hello/world",
prefixHeader: []string{"/otherprefix/"},
expectStatus: 404,
},
{
name: "with prefix and matching header",
path: "/myprefix/hello/world",
prefixHeader: []string{"/myprefix/"},
expectStatus: 200,
expectPath: "/hello/world",
},
{
name: "with prefix and 1st header matching",
path: "/myprefix/hello/world",
prefixHeader: []string{"/myprefix/", "/otherprefix/"},
expectStatus: 200,
expectPath: "/hello/world",
},
{
name: "with prefix and 2nd header matching",
path: "/myprefix/hello/world",
prefixHeader: []string{"/otherprefix/", "/myprefix/"},
expectStatus: 200,
expectPath: "/hello/world",
},
{
name: "with prefix and header not ending with slash",
path: "/myprefix/hello/world",
prefixHeader: []string{"/myprefix"},
expectStatus: 200,
expectPath: "/hello/world",
},
{
name: "with prefix and non-matching header not ending with slash",
path: "/myprefix-suffix/hello/world",
prefixHeader: []string{"/myprefix"},
expectStatus: 404,
},
{
name: "redirect when prefix does not end with a slash",
path: "/myprefix",
prefixHeader: []string{"/myprefix"},
expectStatus: 302,
expectPath: "/myprefix/",
},
} {
t.Run(tc.name, func(t *testing.T) {
actualPath = ""
req := httptest.NewRequest("GET", tc.path, nil)
if tc.prefixHeader != nil {
req.Header["X-Forwarded-Prefix"] = tc.prefixHeader
}
resp, err := app.Test(req, -1)
require.NoError(t, err)
require.Equal(t, tc.expectStatus, resp.StatusCode, "response status code")
if tc.expectStatus == 200 {
require.Equal(t, tc.expectPath, actualPath, "rewritten path")
} else if tc.expectStatus == 302 {
require.Equal(t, tc.expectPath, resp.Header.Get("Location"), "redirect location")
}
})
}
}

View File

@@ -10,7 +10,6 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
fiberhtml "github.com/gofiber/template/html/v2" fiberhtml "github.com/gofiber/template/html/v2"
"github.com/microcosm-cc/bluemonday" "github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/schema"
"github.com/russross/blackfriday" "github.com/russross/blackfriday"
) )
@@ -27,9 +26,7 @@ func notFoundHandler(c *fiber.Ctx) error {
}) })
} else { } else {
// The client expects an HTML response // The client expects an HTML response
return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{ return c.Status(fiber.StatusNotFound).Render("views/404", fiber.Map{})
"BaseURL": utils.BaseURL(c),
})
} }
} }

View File

@@ -11,9 +11,6 @@ func RegisterOpenAIRoutes(app *fiber.App,
application *application.Application) { application *application.Application) {
// openAI compatible API endpoint // openAI compatible API endpoint
// realtime
app.Get("/v1/realtime", openai.Realtime(application))
// chat // chat
app.Post("/v1/chat/completions", app.Post("/v1/chat/completions",
openai.ChatEndpoint( openai.ChatEndpoint(

View File

@@ -6,21 +6,20 @@ import (
"sort" "sort"
"strings" "strings"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/elements" "github.com/mudler/LocalAI/core/http/elements"
"github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/xsync" "github.com/mudler/LocalAI/pkg/xsync"
"github.com/rs/zerolog/log"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/microcosm-cc/bluemonday"
"github.com/rs/zerolog/log"
) )
type modelOpCache struct { type modelOpCache struct {
@@ -92,7 +91,6 @@ func RegisterUIRoutes(app *fiber.App,
app.Get("/p2p", func(c *fiber.Ctx) error { app.Get("/p2p", func(c *fiber.Ctx) error {
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - P2P dashboard", "Title": "LocalAI - P2P dashboard",
"BaseURL": utils.BaseURL(c),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
//"Nodes": p2p.GetAvailableNodes(""), //"Nodes": p2p.GetAvailableNodes(""),
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID), //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
@@ -151,7 +149,6 @@ func RegisterUIRoutes(app *fiber.App,
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Models", "Title": "LocalAI - Models",
"BaseURL": utils.BaseURL(c),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
"Models": template.HTML(elements.ListModels(models, processingModels, galleryService)), "Models": template.HTML(elements.ListModels(models, processingModels, galleryService)),
"Repositories": appConfig.Galleries, "Repositories": appConfig.Galleries,
@@ -311,7 +308,6 @@ func RegisterUIRoutes(app *fiber.App,
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Chat with " + c.Params("model"), "Title": "LocalAI - Chat with " + c.Params("model"),
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": c.Params("model"), "Model": c.Params("model"),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
@@ -327,12 +323,11 @@ func RegisterUIRoutes(app *fiber.App,
if len(backendConfigs) == 0 { if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models // If no model is available redirect to the index which suggests how to install models
return c.Redirect(utils.BaseURL(c)) return c.Redirect("/")
} }
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Talk", "Title": "LocalAI - Talk",
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": backendConfigs[0], "Model": backendConfigs[0],
"IsP2PEnabled": p2p.IsP2PEnabled(), "IsP2PEnabled": p2p.IsP2PEnabled(),
@@ -349,12 +344,11 @@ func RegisterUIRoutes(app *fiber.App,
if len(backendConfigs) == 0 { if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models // If no model is available redirect to the index which suggests how to install models
return c.Redirect(utils.BaseURL(c)) return c.Redirect("/")
} }
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Chat with " + backendConfigs[0], "Title": "LocalAI - Chat with " + backendConfigs[0],
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": backendConfigs[0], "Model": backendConfigs[0],
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
@@ -370,7 +364,6 @@ func RegisterUIRoutes(app *fiber.App,
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Generate images with " + c.Params("model"), "Title": "LocalAI - Generate images with " + c.Params("model"),
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": c.Params("model"), "Model": c.Params("model"),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
@@ -387,12 +380,11 @@ func RegisterUIRoutes(app *fiber.App,
if len(backendConfigs) == 0 { if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models // If no model is available redirect to the index which suggests how to install models
return c.Redirect(utils.BaseURL(c)) return c.Redirect("/")
} }
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Generate images with " + backendConfigs[0].Name, "Title": "LocalAI - Generate images with " + backendConfigs[0].Name,
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name, "Model": backendConfigs[0].Name,
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
@@ -408,7 +400,6 @@ func RegisterUIRoutes(app *fiber.App,
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Generate images with " + c.Params("model"), "Title": "LocalAI - Generate images with " + c.Params("model"),
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": c.Params("model"), "Model": c.Params("model"),
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
@@ -425,12 +416,11 @@ func RegisterUIRoutes(app *fiber.App,
if len(backendConfigs) == 0 { if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models // If no model is available redirect to the index which suggests how to install models
return c.Redirect(utils.BaseURL(c)) return c.Redirect("/")
} }
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name, "Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
"BaseURL": utils.BaseURL(c),
"ModelsConfig": backendConfigs, "ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name, "Model": backendConfigs[0].Name,
"IsP2PEnabled": p2p.IsP2PEnabled(), "IsP2PEnabled": p2p.IsP2PEnabled(),

View File

@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wg
font-style: normal; font-style: normal;
font-weight: 400; font-weight: 400;
font-display: swap; font-display: swap;
src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype'); src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuLyfMZg.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Inter'; font-family: 'Inter';
font-style: normal; font-style: normal;
font-weight: 600; font-weight: 600;
font-display: swap; font-display: swap;
src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype'); src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuGKYMZg.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Inter'; font-family: 'Inter';
font-style: normal; font-style: normal;
font-weight: 700; font-weight: 700;
font-display: swap; font-display: swap;
src: url(./UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype'); src: url(/static/assets/UcCO3FwrK3iLTeHuS_fvQtMwCp50KnMw2boKoduKmMEVuFuYMZg.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Roboto'; font-family: 'Roboto';
font-style: normal; font-style: normal;
font-weight: 400; font-weight: 400;
font-display: swap; font-display: swap;
src: url(./KFOmCnqEu92Fr1Me5Q.ttf) format('truetype'); src: url(/static/assets/KFOmCnqEu92Fr1Me5Q.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Roboto'; font-family: 'Roboto';
font-style: normal; font-style: normal;
font-weight: 500; font-weight: 500;
font-display: swap; font-display: swap;
src: url(./KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype'); src: url(/static/assets/KFOlCnqEu92Fr1MmEU9vAw.ttf) format('truetype');
} }

View File

@@ -7,33 +7,33 @@ https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap
font-style: normal; font-style: normal;
font-weight: 300; font-weight: 300;
font-display: swap; font-display: swap;
src: url(./KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype'); src: url(/static/assets//KFOlCnqEu92Fr1MmSU5fBBc9.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Roboto'; font-family: 'Roboto';
font-style: normal; font-style: normal;
font-weight: 400; font-weight: 400;
font-display: swap; font-display: swap;
src: url(./KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype'); src: url(/static/assets//KFOmCnqEu92Fr1Mu4mxP.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Roboto'; font-family: 'Roboto';
font-style: normal; font-style: normal;
font-weight: 500; font-weight: 500;
font-display: swap; font-display: swap;
src: url(./KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype'); src: url(/static/assets//KFOlCnqEu92Fr1MmEU9fBBc9.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Roboto'; font-family: 'Roboto';
font-style: normal; font-style: normal;
font-weight: 700; font-weight: 700;
font-display: swap; font-display: swap;
src: url(./KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype'); src: url(/static/assets//KFOlCnqEu92Fr1MmWUlfBBc9.ttf) format('truetype');
} }
@font-face { @font-face {
font-family: 'Roboto'; font-family: 'Roboto';
font-style: normal; font-style: normal;
font-weight: 900; font-weight: 900;
font-display: swap; font-display: swap;
src: url(./KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype'); src: url(/static/assets//KFOlCnqEu92Fr1MmYUtfBBc9.ttf) format('truetype');
} }

View File

@@ -143,7 +143,7 @@ function readInputImage() {
// } // }
// Source: https://stackoverflow.com/a/75751803/11386095 // Source: https://stackoverflow.com/a/75751803/11386095
const response = await fetch("v1/chat/completions", { const response = await fetch("/v1/chat/completions", {
method: "POST", method: "POST",
headers: { headers: {
Authorization: `Bearer ${key}`, Authorization: `Bearer ${key}`,

View File

@@ -48,7 +48,7 @@ async function promptDallE(key, input) {
document.getElementById("input").disabled = true; document.getElementById("input").disabled = true;
const model = document.getElementById("image-model").value; const model = document.getElementById("image-model").value;
const response = await fetch("v1/images/generations", { const response = await fetch("/v1/images/generations", {
method: "POST", method: "POST",
headers: { headers: {
Authorization: `Bearer ${key}`, Authorization: `Bearer ${key}`,

View File

@@ -122,7 +122,7 @@ async function sendAudioToWhisper(audioBlob) {
formData.append('model', getWhisperModel()); formData.append('model', getWhisperModel());
API_KEY = localStorage.getItem("key"); API_KEY = localStorage.getItem("key");
const response = await fetch('v1/audio/transcriptions', { const response = await fetch('/v1/audio/transcriptions', {
method: 'POST', method: 'POST',
headers: { headers: {
'Authorization': `Bearer ${API_KEY}` 'Authorization': `Bearer ${API_KEY}`
@@ -139,7 +139,7 @@ async function sendTextToChatGPT(text) {
conversationHistory.push({ role: "user", content: text }); conversationHistory.push({ role: "user", content: text });
API_KEY = localStorage.getItem("key"); API_KEY = localStorage.getItem("key");
const response = await fetch('v1/chat/completions', { const response = await fetch('/v1/chat/completions', {
method: 'POST', method: 'POST',
headers: { headers: {
'Authorization': `Bearer ${API_KEY}`, 'Authorization': `Bearer ${API_KEY}`,
@@ -163,7 +163,7 @@ async function sendTextToChatGPT(text) {
async function getTextToSpeechAudio(text) { async function getTextToSpeechAudio(text) {
API_KEY = localStorage.getItem("key"); API_KEY = localStorage.getItem("key");
const response = await fetch('v1/audio/speech', { const response = await fetch('/v1/audio/speech', {
method: 'POST', method: 'POST',
headers: { headers: {

View File

@@ -19,7 +19,7 @@ async function tts(key, input) {
document.getElementById("input").disabled = true; document.getElementById("input").disabled = true;
const model = document.getElementById("tts-model").value; const model = document.getElementById("tts-model").value;
const response = await fetch("tts", { const response = await fetch("/tts", {
method: "POST", method: "POST",
headers: { headers: {
Authorization: `Bearer ${key}`, Authorization: `Bearer ${key}`,

View File

@@ -1,24 +0,0 @@
package utils
import (
"strings"
"github.com/gofiber/fiber/v2"
)
// BaseURL returns the base URL for the given HTTP request context.
// It takes into account that the app may be exposed by a reverse-proxy under a different protocol, host and path.
// The returned URL is guaranteed to end with `/`.
// The method should be used in conjunction with the StripPathPrefix middleware.
func BaseURL(c *fiber.Ctx) string {
path := c.Path()
origPath := c.OriginalURL()
if path != origPath && strings.HasSuffix(origPath, path) {
pathPrefix := origPath[:len(origPath)-len(path)+1]
return c.BaseURL() + pathPrefix
}
return c.BaseURL() + "/"
}

View File

@@ -1,48 +0,0 @@
package utils
import (
"net/http/httptest"
"testing"
"github.com/gofiber/fiber/v2"
"github.com/stretchr/testify/require"
)
func TestBaseURL(t *testing.T) {
for _, tc := range []struct {
name string
prefix string
expectURL string
}{
{
name: "without prefix",
prefix: "/",
expectURL: "http://example.com/",
},
{
name: "with prefix",
prefix: "/myprefix/",
expectURL: "http://example.com/myprefix/",
},
} {
t.Run(tc.name, func(t *testing.T) {
app := fiber.New()
actualURL := ""
app.Get(tc.prefix+"hello/world", func(c *fiber.Ctx) error {
if tc.prefix != "/" {
c.Path("/hello/world")
}
actualURL = BaseURL(c)
return nil
})
req := httptest.NewRequest("GET", tc.prefix+"hello/world", nil)
resp, err := app.Test(req, -1)
require.NoError(t, err)
require.Equal(t, 200, resp.StatusCode, "response status code")
require.Equal(t, tc.expectURL, actualURL, "base URL")
})
}
}

View File

@@ -12,7 +12,7 @@
<div class="header text-center py-12"> <div class="header text-center py-12">
<h1 class="text-5xl font-bold">Welcome to your LocalAI instance!</h1> <h1 class="text-5xl font-bold">Welcome to your LocalAI instance!</h1>
<div class="mt-6"> <div class="mt-6">
<!-- <a href="./" aria-label="HomePage" alt="HomePage"> <!-- <a href="/" aria-label="HomePage" alt="HomePage">
<img class="mx-auto w-1/4 h-auto" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo"> <img class="mx-auto w-1/4 h-auto" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo">
</a> </a>
--> -->

View File

@@ -28,7 +28,7 @@ SOFTWARE.
<!doctype html> <!doctype html>
<html lang="en"> <html lang="en">
{{template "views/partials/head" .}} {{template "views/partials/head" .}}
<script defer src="static/chat.js"></script> <script defer src="/static/chat.js"></script>
<style> <style>
body { body {
overflow: hidden; overflow: hidden;
@@ -101,9 +101,9 @@ SOFTWARE.
{{ $model:=.Model}} {{ $model:=.Model}}
{{ range .ModelsConfig }} {{ range .ModelsConfig }}
{{ if eq . $model }} {{ if eq . $model }}
<option value="chat/{{.}}" selected class="bg-gray-700 text-white">{{.}}</option> <option value="/chat/{{.}}" selected class="bg-gray-700 text-white">{{.}}</option>
{{ else }} {{ else }}
<option value="chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option> <option value="/chat/{{.}}" class="bg-gray-700 text-white">{{.}}</option>
{{ end }} {{ end }}
{{ end }} {{ end }}
</select> </select>
@@ -142,7 +142,7 @@ SOFTWARE.
<div id="loader" class="my-2 loader" style="display: none;"></div> <div id="loader" class="my-2 loader" style="display: none;"></div>
<input id="chat-model" type="hidden" value="{{.Model}}"> <input id="chat-model" type="hidden" value="{{.Model}}">
<input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name"> <input id="input_image" type="file" style="display: none;" @change="fileName = $event.target.files[0].name">
<form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt"> <form id="prompt" action="/chat/{{.Model}}" method="get" @submit.prevent="submitPrompt">
<div class="relative w-full"> <div class="relative w-full">
<textarea <textarea
id="input" id="input"

View File

@@ -370,7 +370,7 @@
} }
} }
</script> </script>
<script src="static/p2panimation.js"></script> <script src="/static/p2panimation.js"></script>
{{template "views/partials/footer" .}} {{template "views/partials/footer" .}}
</div> </div>

View File

@@ -20,7 +20,7 @@
{{template "views/partials/inprogress" .}} {{template "views/partials/inprogress" .}}
{{ if eq (len .ModelsConfig) 0 }} {{ if eq (len .ModelsConfig) 0 }}
<h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2> <h2 class="text-center text-3xl font-semibold text-gray-100"> <i class="text-yellow-200 ml-2 fa-solid fa-triangle-exclamation animate-pulse"></i> Ouch! seems you don't have any models installed from the LocalAI gallery!</h2>
<p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p> <p class="text-center mt-4 text-xl">..install something from the <a class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded" href="/browse">🖼️ Gallery</a> or check the <a href="https://localai.io/basics/getting_started/" class="text-gray-400 hover:text-white ml-1 px-3 py-2 rounded"> <i class="fa-solid fa-book"></i> Getting started documentation </a></p>
{{ if ne (len .Models) 0 }} {{ if ne (len .Models) 0 }}
<hr class="my-4"> <hr class="my-4">
@@ -66,7 +66,7 @@
{{ end }} {{ end }}
</td> </td>
<td class="px-4 py-3 font-bold"> <td class="px-4 py-3 font-bold">
<p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="browse?term={{.Name}}">{{.Name}}</a></p> <p class="font-bold text-white flex items-center"><i class="fas fa-brain pr-2"></i><a href="/browse?term={{.Name}}">{{.Name}}</a></p>
</td> </td>
<td class="px-4 py-3 font-bold"> <td class="px-4 py-3 font-bold">
{{ if .Backend }} {{ if .Backend }}
@@ -84,7 +84,7 @@
<td class="px-4 py-3"> <td class="px-4 py-3">
<button <button
class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong" class="float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button> data-twe-ripple-color="light" data-twe-ripple-init="" hx-confirm="Are you sure you wish to delete the model?" hx-post="/browse/delete/model/{{.Name}}" hx-swap="outerHTML"><i class="fa-solid fa-cancel pr-2"></i>Delete</button>
</td> </td>
{{ end }} {{ end }}
{{ range .Models }} {{ range .Models }}

View File

@@ -4,8 +4,6 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Open Authenticated Website</title> <title>Open Authenticated Website</title>
<base href="{{.BaseURL}}" />
<link rel="icon" type="image/x-icon" href="favicon.ico" />
</head> </head>
<body> <body>
<h1>Authorization is required</h1> <h1>Authorization is required</h1>

View File

@@ -16,38 +16,38 @@
<div class="text-center font-semibold text-gray-100"> <div class="text-center font-semibold text-gray-100">
<h2>Filter by type:</h2> <h2>Filter by type:</h2>
<button hx-post="browse/search/models" <button hx-post="/browse/search/models"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "tts"}' hx-vals='{"search": "tts"}'
hx-indicator=".htmx-indicator" >TTS</button> hx-indicator=".htmx-indicator" >TTS</button>
<button hx-post="browse/search/models" <button hx-post="/browse/search/models"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "stablediffusion"}' hx-vals='{"search": "stablediffusion"}'
hx-indicator=".htmx-indicator" >Image generation</button> hx-indicator=".htmx-indicator" >Image generation</button>
<button hx-post="browse/search/models" \ <button hx-post="/browse/search/models" \
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "llm"}' hx-vals='{"search": "llm"}'
hx-indicator=".htmx-indicator" >Text generation</button> hx-indicator=".htmx-indicator" >Text generation</button>
<button hx-post="browse/search/models" <button hx-post="/browse/search/models"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "multimodal"}' hx-vals='{"search": "multimodal"}'
hx-indicator=".htmx-indicator" >Multimodal</button> hx-indicator=".htmx-indicator" >Multimodal</button>
<button hx-post="browse/search/models" <button hx-post="/browse/search/models"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "embedding"}' hx-vals='{"search": "embedding"}'
hx-indicator=".htmx-indicator" >Embeddings</button> hx-indicator=".htmx-indicator" >Embeddings</button>
<button hx-post="browse/search/models" <button hx-post="/browse/search/models"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "rerank"}' hx-vals='{"search": "rerank"}'
hx-indicator=".htmx-indicator" >Rerankers</button> hx-indicator=".htmx-indicator" >Rerankers</button>
<button <button
hx-post="browse/search/models" hx-post="/browse/search/models"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
hx-target="#search-results" hx-target="#search-results"
hx-vals='{"search": "whisper"}' hx-vals='{"search": "whisper"}'
@@ -57,7 +57,7 @@
<div class="text-center text-xs font-semibold text-gray-100"> <div class="text-center text-xs font-semibold text-gray-100">
Filter by tags: Filter by tags:
{{ range .AllTags }} {{ range .AllTags }}
<button hx-post="browse/search/models" class="text-blue-500" hx-target="#search-results" <button hx-post="/browse/search/models" class="text-blue-500" hx-target="#search-results"
hx-vals='{"search": "{{.}}"}' hx-vals='{"search": "{{.}}"}'
hx-indicator=".htmx-indicator" >{{.}}</button> hx-indicator=".htmx-indicator" >{{.}}</button>
{{ end }} {{ end }}
@@ -69,7 +69,7 @@
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
name="search" placeholder="Begin Typing To Search models..." name="search" placeholder="Begin Typing To Search models..."
hx-post="browse/search/models" hx-post="/browse/search/models"
hx-trigger="input changed delay:500ms, search" hx-trigger="input changed delay:500ms, search"
hx-target="#search-results" hx-target="#search-results"
hx-indicator=".htmx-indicator"> hx-indicator=".htmx-indicator">

View File

@@ -48,11 +48,11 @@
<!-- Federation Box --> <!-- Federation Box -->
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left"> <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
<p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p> <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Federated Nodes: <span hx-get="/p2p/ui/workers-federation-stats" hx-trigger="every 1s"></span> </p>
<p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p> <p class="mb-4">You can start LocalAI in federated mode to share your instance, or start the federated server to balance requests between nodes of the federation.</p>
<div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12"> <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
<div hx-get="p2p/ui/workers-federation" hx-trigger="every 1s"></div> <div hx-get="/p2p/ui/workers-federation" hx-trigger="every 1s"></div>
</div> </div>
<hr class="border-gray-700 mb-12"> <hr class="border-gray-700 mb-12">
@@ -123,11 +123,11 @@
<div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left"> <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
<p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p> <p class="text-xl font-semibold text-gray-200"> <i class="text-gray-200 fa-solid fa-circle-nodes"></i> Workers (llama.cpp): <span hx-get="/p2p/ui/workers-stats" hx-trigger="every 1s"></span> </p>
<p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p> <p class="mb-4">You can start llama.cpp workers to distribute weights between the workers and offload part of the computation. To start a new worker, you can use the CLI or Docker.</p>
<div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12"> <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4 mb-12">
<div hx-get="p2p/ui/workers" hx-trigger="every 1s"></div> <div hx-get="/p2p/ui/workers" hx-trigger="every 1s"></div>
</div> </div>
<hr class="border-gray-700 mb-12"> <hr class="border-gray-700 mb-12">
@@ -177,7 +177,7 @@
{{template "views/partials/footer" .}} {{template "views/partials/footer" .}}
</div> </div>
<script src="static/p2panimation.js"></script> <script src="/static/p2panimation.js"></script>
<style> <style>
.token { .token {
word-break: break-all; word-break: break-all;

View File

@@ -2,4 +2,4 @@
LocalAI Version {{.Version}}<br> LocalAI Version {{.Version}}<br>
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a> <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
</footer> </footer>
<script src="static/assets/tw-elements.js"></script> <script src="/static/assets/tw-elements.js"></script>

View File

@@ -2,35 +2,33 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{.Title}}</title> <title>{{.Title}}</title>
<base href="{{.BaseURL}}" />
<link rel="icon" type="image/x-icon" href="favicon.ico" />
<link <link
rel="stylesheet" rel="stylesheet"
href="static/assets/highlightjs.css" href="/static/assets/highlightjs.css"
/> />
<script defer src="static/assets/highlightjs.js"></script> <script defer src="/static/assets/highlightjs.js"></script>
<script <script
defer defer
src="static/assets/alpine.js" src="/static/assets/alpine.js"
></script> ></script>
<script <script
defer defer
src="static/assets/marked.js" src="/static/assets/marked.js"
></script> ></script>
<script <script
defer defer
src="static/assets/purify.js" src="/static/assets/purify.js"
></script> ></script>
<link href="static/general.css" rel="stylesheet" /> <link href="/static/general.css" rel="stylesheet" />
<link href="static/assets/font1.css" rel="stylesheet"> <link href="/static/assets/font1.css" rel="stylesheet">
<link <link
href="static/assets/font2.css" href="/static/assets/font2.css"
rel="stylesheet" /> rel="stylesheet" />
<link <link
rel="stylesheet" rel="stylesheet"
href="static/assets/tw-elements.css" /> href="/static/assets/tw-elements.css" />
<script src="static/assets/tailwindcss.js"></script> <script src="/static/assets/tailwindcss.js"></script>
<script> <script>
tailwind.config = { tailwind.config = {
darkMode: "class", darkMode: "class",
@@ -56,11 +54,11 @@
}); });
} }
</script> </script>
<link href="static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" /> <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
<link href="static/assets/fontawesome/css/brands.css" rel="stylesheet" /> <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
<link href="static/assets/fontawesome/css/solid.css" rel="stylesheet" /> <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
<script src="static/assets/flowbite.min.js"></script> <script src="/static/assets/flowbite.min.js"></script>
<script src="static/assets/htmx.js" crossorigin="anonymous"></script> <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
<!-- P2P Animation START --> <!-- P2P Animation START -->
<style> <style>
.animation-container { .animation-container {

View File

@@ -17,13 +17,13 @@
<div class="flex items-center justify-between bg-slate-600 p-2 mb-2 rounded-md"> <div class="flex items-center justify-between bg-slate-600 p-2 mb-2 rounded-md">
<div class="flex items center"> <div class="flex items center">
<span class="text-gray-300"><a href="browse?term={{$parts._1}}" <span class="text-gray-300"><a href="/browse?term={{$parts._1}}"
class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2" class="text-white-500 inline-block bg-blue-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2"
>{{$modelName}}</a> {{if $repository}} (from the '{{$repository}}' repository) {{end}}</span> >{{$modelName}}</a> {{if $repository}} (from the '{{$repository}}' repository) {{end}}</span>
</div> </div>
<div hx-get="browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done"> <div hx-get="/browse/job/{{$value}}" hx-swap="outerHTML" hx-target="this" hx-trigger="done">
<h3 role="status" id="pblabel" >{{$op}} <h3 role="status" id="pblabel" >{{$op}}
<div hx-get="browse/job/progress/{{$value}}" hx-trigger="every 600ms" <div hx-get="/browse/job/progress/{{$value}}" hx-trigger="every 600ms"
hx-target="this" hx-target="this"
hx-swap="innerHTML" ></div></h3> hx-swap="innerHTML" ></div></h3>
</div> </div>

View File

@@ -3,8 +3,8 @@
<div class="flex items-center justify-between"> <div class="flex items-center justify-between">
<div class="flex items-center"> <div class="flex items-center">
<!-- Logo Image: Replace 'logo_url_here' with your actual logo URL --> <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
<a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a> <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
<a href="./" class="text-white text-xl font-bold">LocalAI</a> <a href="/" class="text-white text-xl font-bold">LocalAI</a>
</div> </div>
<!-- Menu button for small screens --> <!-- Menu button for small screens -->
<div class="lg:hidden"> <div class="lg:hidden">
@@ -14,33 +14,33 @@
</div> </div>
<!-- Navigation links --> <!-- Navigation links -->
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0"> <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
<a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a> <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a> <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
<a href="browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a> <a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
<a href="chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a> <a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a> <a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a> <a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a> <a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
{{ if .IsP2PEnabled }} {{ if .IsP2PEnabled }}
<a href="p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a> <a href="/p2p/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
{{ end }} {{ end }}
<a href="swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a> <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
</div> </div>
</div> </div>
<!-- Collapsible menu for small screens --> <!-- Collapsible menu for small screens -->
<div class="hidden lg:hidden" id="mobile-menu"> <div class="hidden lg:hidden" id="mobile-menu">
<div class="pt-4 pb-3 border-t border-gray-700"> <div class="pt-4 pb-3 border-t border-gray-700">
<a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a> <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a> <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
<a href="browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a> <a href="/browse/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-brain pr-2"></i> Models</a>
<a href="chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a> <a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a> <a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a> <a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a> <a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
{{ if .IsP2PEnabled }} {{ if .IsP2PEnabled }}
<a href="p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a> <a href="/p2p/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-circle-nodes"></i> Swarm </a>
{{ end }} {{ end }}
<a href="swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a> <a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -3,8 +3,8 @@
<div class="flex items-center justify-between"> <div class="flex items-center justify-between">
<div class="flex items-center"> <div class="flex items-center">
<!-- Logo Image: Replace 'logo_url_here' with your actual logo URL --> <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
<a href="./" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a> <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
<a href="./" class="text-white text-xl font-bold">LocalAI</a> <a href="/" class="text-white text-xl font-bold">LocalAI</a>
</div> </div>
<!-- Menu button for small screens --> <!-- Menu button for small screens -->
<div class="lg:hidden"> <div class="lg:hidden">
@@ -14,7 +14,7 @@
</div> </div>
<!-- Navigation links --> <!-- Navigation links -->
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0"> <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
<a href="./" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a> <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a> <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
<a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a> <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
</div> </div>
@@ -22,7 +22,7 @@
<!-- Collapsible menu for small screens --> <!-- Collapsible menu for small screens -->
<div class="hidden lg:hidden" id="mobile-menu"> <div class="hidden lg:hidden" id="mobile-menu">
<div class="pt-4 pb-3 border-t border-gray-700"> <div class="pt-4 pb-3 border-t border-gray-700">
<a href="./" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a> <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a> <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
<a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a> <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
</div> </div>

View File

@@ -1,7 +1,7 @@
<!doctype html> <!doctype html>
<html lang="en"> <html lang="en">
{{template "views/partials/head" .}} {{template "views/partials/head" .}}
<script defer src="static/talk.js"></script> <script defer src="/static/talk.js"></script>
<style> <style>
body { body {
overflow: hidden; overflow: hidden;

View File

@@ -1,7 +1,7 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
{{template "views/partials/head" .}} {{template "views/partials/head" .}}
<script defer src="static/image.js"></script> <script defer src="/static/image.js"></script>
<body class="bg-gray-900 text-gray-200"> <body class="bg-gray-900 text-gray-200">
<div class="flex flex-col min-h-screen"> <div class="flex flex-col min-h-screen">
@@ -50,9 +50,9 @@
{{ $model:=.Model}} {{ $model:=.Model}}
{{ range .ModelsConfig }} {{ range .ModelsConfig }}
{{ if eq .Name $model }} {{ if eq .Name $model }}
<option value="text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option> <option value="/text2image/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
{{ else }} {{ else }}
<option value="text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> <option value="/text2image/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }} {{ end }}
{{ end }} {{ end }}
</select> </select>
@@ -62,7 +62,7 @@
<div class="mt-12"> <div class="mt-12">
<input id="image-model" type="hidden" value="{{.Model}}"> <input id="image-model" type="hidden" value="{{.Model}}">
<form id="genimage" action="text2image/{{.Model}}" method="get"> <form id="genimage" action="/text2image/{{.Model}}" method="get">
<input <input
type="text" type="text"
id="input" id="input"

View File

@@ -1,7 +1,7 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
{{template "views/partials/head" .}} {{template "views/partials/head" .}}
<script defer src="static/tts.js"></script> <script defer src="/static/tts.js"></script>
<body class="bg-gray-900 text-gray-200"> <body class="bg-gray-900 text-gray-200">
<div class="flex flex-col min-h-screen"> <div class="flex flex-col min-h-screen">
@@ -47,9 +47,9 @@
{{ $model:=.Model}} {{ $model:=.Model}}
{{ range .ModelsConfig }} {{ range .ModelsConfig }}
{{ if eq .Name $model }} {{ if eq .Name $model }}
<option value="tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option> <option value="/tts/{{.Name}}" selected class="bg-gray-700 text-white">{{.Name}}</option>
{{ else }} {{ else }}
<option value="tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option> <option value="/tts/{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }} {{ end }}
{{ end }} {{ end }}
</select> </select>
@@ -59,7 +59,7 @@
<div class="mt-12"> <div class="mt-12">
<input id="tts-model" type="hidden" value="{{.Model}}"> <input id="tts-model" type="hidden" value="{{.Model}}">
<form id="tts" action="tts/{{.Model}}" method="get"> <form id="tts" action="/tts/{{.Model}}" method="get">
<input <input
type="text" type="text"
id="input" id="input"

View File

@@ -129,9 +129,6 @@ There are options that can be tweaked or parameters that can be set using enviro
| Environment Variable | Description | | Environment Variable | Description |
|----------------------|-------------| |----------------------|-------------|
| **LOCALAI_P2P** | Set to "true" to enable p2p |
| **LOCALAI_FEDERATED** | Set to "true" to enable federated mode |
| **FEDERATED_SERVER** | Set to "true" to enable federated server |
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) | | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
| **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) | | **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) |
| **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses | | **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses |

View File

@@ -16,8 +16,8 @@ LocalAI will attempt to automatically load models which are not explicitly confi
| Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration |
|----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes | yes | CUDA, openCL, cuBLAS, Metal | | [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [llama.cpp's ggml model (backward compatibility with old format, before GGUF)](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp)) | LLama, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes | yes | CUDA, openCL, cuBLAS, Metal | | [llama.cpp's ggml model (backward compatibility with old format, before GGUF)](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp)) | LLama, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [whisper](https://github.com/ggerganov/whisper.cpp) | whisper | no | Audio | no | no | N/A | | [whisper](https://github.com/ggerganov/whisper.cpp) | whisper | no | Audio | no | no | N/A |
| [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) | stablediffusion | no | Image | no | no | N/A | | [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) | stablediffusion | no | Image | no | no | N/A |
| [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A | | [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A |
@@ -37,11 +37,14 @@ LocalAI will attempt to automatically load models which are not explicitly confi
| `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA | | `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CPU/CUDA | | [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CPU/CUDA |
| `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | yes* | CPU/CUDA/XPU | | `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | yes**** | CPU/CUDA/XPU |
| [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | yes | | [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | yes |
| [stablediffusion-cpp](https://github.com/leejet/stable-diffusion.cpp) | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker | no | Image | no | no | N/A | | [stablediffusion-cpp](https://github.com/leejet/stable-diffusion.cpp) | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker | no | Image | no | no | N/A |
| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU | | [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU |
Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})). Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).
- \* Only for CUDA and OpenVINO CPU/XPU acceleration. - \* 7b ONLY
- ** doesn't seem to be accurate
- *** 7b and 40b with the `ggccv` format, for instance: https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-40B-GGML
- **** Only for CUDA and OpenVINO CPU/XPU acceleration.

View File

@@ -1,35 +0,0 @@
+++
disableToc = false
title = "Running on Nvidia ARM64"
weight = 27
+++
LocalAI can be run on Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. The following instructions will guide you through building the LocalAI container for Nvidia ARM64 devices.
## Prerequisites
- Docker engine installed (https://docs.docker.com/engine/install/ubuntu/)
- Nvidia container toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap)
## Build the container
Build the LocalAI container for Nvidia ARM64 devices using the following command:
```bash
git clone https://github.com/mudler/LocalAI
cd LocalAI
docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t localai-orin .
```
## Usage
Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
```bash
docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models -ti --restart=always --name local-ai --runtime nvidia --gpus all localai-orin
```
Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.

View File

@@ -1,3 +1,3 @@
{ {
"version": "v2.25.0" "version": "v2.24.2"
} }

View File

@@ -1,40 +0,0 @@
---
name: "falcon3"
config_file: |
mmap: true
template:
chat_message: |
<|{{ .RoleName }}|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}
{{ if eq .RoleName "assistant" }}<|endoftext|>{{ end }}
function: |
<|system|>
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
{{.Input }}
<|im_start|>assistant
chat: |
{{.Input }}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- '<|endoftext|>'
- '<dummy32000>'
- '</s>'

View File

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More