Compare commits

...

34 Commits

Author SHA1 Message Date
Ettore Di Giacinto
894a30296a feat: unify and propagate CMAKE_ARGS to GGML-based backends
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 22:02:58 +01:00
Ettore Di Giacinto
c85f46a71d chore(model gallery): add sailor2-20b-chat (#4365)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 10:55:04 +01:00
Ettore Di Giacinto
75b283d83c chore(model gallery): add sailor2-8b-chat (#4364)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 10:51:39 +01:00
Ettore Di Giacinto
1918efdfdd chore(model gallery): add sailor2-1b-chat (#4363)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-11 10:32:18 +01:00
LocalAI [bot]
ec239a0cd0 docs: ⬆️ update docs version mudler/LocalAI (#4359)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-11 10:04:16 +01:00
LocalAI [bot]
b74a936178 chore: ⬆️ Update ggerganov/llama.cpp to dafae66cc242eb766797194d3c85c5e502625623 (#4360)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-10 21:45:42 +00:00
Ettore Di Giacinto
de1ddb8ba6 chore(model gallery): add b-nimita-l3-8b-v0.02 (#4357)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-10 09:42:47 +01:00
Ettore Di Giacinto
272763f625 chore(model gallery): add intellect-1-instruct (#4356)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-10 09:42:37 +01:00
Ettore Di Giacinto
3aff87a5cf chore(model gallery): add qwen2.5-math-14b-instruct (#4355)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-10 09:42:24 +01:00
LocalAI [bot]
885118e863 chore: ⬆️ Update ggerganov/llama.cpp to 26a8406ba9198eb6fdd8329fa717555b4f77f05f (#4353)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-10 09:10:58 +01:00
dependabot[bot]
a03a9b9e51 chore(deps): Bump docs/themes/hugo-theme-relearn from be85052 to bd1f3d3 (#4348)
chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `be85052` to `bd1f3d3`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](be85052efe...bd1f3d3432)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-09 20:09:26 +00:00
Ettore Di Giacinto
f45d6c746a chore(model gallery): add tulu-3.1-8b-supernova-smart (#4347)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-09 15:58:29 +01:00
Ettore Di Giacinto
5eceb5f67c chore(model gallery): add impish_mind_8b (#4344)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-09 10:24:30 +01:00
Ettore Di Giacinto
a9c0dd3a1e chore(model gallery): add qwen2.5-7b-homeranvita-nerdmix (#4343)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-09 10:24:15 +01:00
LocalAI [bot]
fb17e737f0 docs: ⬆️ update docs version mudler/LocalAI (#4341)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-09 09:19:25 +01:00
LocalAI [bot]
b5a21202ed chore: ⬆️ Update ggerganov/llama.cpp to e52522b8694ae73abf12feb18d29168674aa1c1b (#4342)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-08 22:54:06 +00:00
Ettore Di Giacinto
e147f1bd3e chore(model gallery): add bio-medical-llama-3-8b (#4339)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 18:43:26 +01:00
Ettore Di Giacinto
61839efed2 chore(model gallery): add virtuoso-small (#4338)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 18:01:25 +01:00
Ettore Di Giacinto
a0fe050055 chore(model gallery): add mn-chunky-lotus-12b (#4337)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 18:01:16 +01:00
Ettore Di Giacinto
f943c4b803 Revert "feat: include tokens usage for streamed output" (#4336)
Revert "feat: include tokens usage for streamed output (#4282)"

This reverts commit 0d6c3a7d57.
2024-12-08 17:53:36 +01:00
Ettore Di Giacinto
cea5a0ea42 feat(template): read jinja templates from gguf files (#4332)
* Read jinja templates as fallback

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move templating out of model loader

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Test TemplateMessages

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Set role and content from transformers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tests: be more flexible

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* More jinja

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small refactoring and adaptations

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-08 13:50:33 +01:00
LocalAI [bot]
f5e1527a5a chore: ⬆️ Update ggerganov/llama.cpp to 3573fa8e7b7f0865638b52b4e9b4d2006f0558a2 (#4335)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-07 21:51:45 +00:00
Ettore Di Giacinto
7184ca546f chore(model gallery): add llama-3.3-70b-instruct (#4333)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-07 10:39:20 +01:00
LocalAI [bot]
5592f5e820 chore: ⬆️ Update ggerganov/llama.cpp to c5ede3849fc021174862f9c0bf8273808d8f0d39 (#4330)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-06 21:46:51 +00:00
Ettore Di Giacinto
d4c1746c7d feat(llama.cpp): expose cache_type_k and cache_type_v for quant of kv cache (#4329)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-06 10:23:59 +01:00
LocalAI [bot]
88737e1d76 chore: ⬆️ Update ggerganov/llama.cpp to c9c6e01daedac542b174c235872569fce5385982 (#4328)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-06 09:15:21 +01:00
LocalAI [bot]
ba225f660b docs: ⬆️ update docs version mudler/LocalAI (#4327)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-05 21:54:00 +00:00
Ettore Di Giacinto
3127cd1352 chore(docs): update available backends (#4325)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 16:57:56 +01:00
PetrFlegr
b90d78d9f6 Updated links of yamls (#4324)
Updated links

Links to deplyment*.yaml was changed

Signed-off-by: PetrFlegr <ptrflegr@gmail.com>
2024-12-05 16:06:51 +01:00
Ettore Di Giacinto
b86a3e4fa6 chore(model gallery): add math-iio-7b-instruct (#4323)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 10:05:35 +01:00
Ettore Di Giacinto
be907d993f chore(model gallery): add loki-v2.6-8b-1024k (#4321)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 10:02:02 +01:00
Ettore Di Giacinto
ab0f8648a3 chore(model gallery): add rp-naughty-v1.0c-8b (#4322)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-05 10:01:49 +01:00
LocalAI [bot]
c226149503 chore: ⬆️ Update leejet/stable-diffusion.cpp to 9578fdcc4632dc3de5565f28e2fb16b7c18f8d48 (#4320)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-05 09:09:11 +01:00
LocalAI [bot]
4a079f893c chore: ⬆️ Update ggerganov/llama.cpp to 59f4db10883a4f3e855cffbf2c3ab68430e95272 (#4319)
⬆️ Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2024-12-04 22:19:35 +00:00
37 changed files with 1459 additions and 874 deletions

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=cc98896db858df7aa40d0e16a505883ef196a482
CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -32,7 +32,7 @@ BARKCPP_VERSION?=v1.0.0
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c
STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
@@ -40,6 +40,7 @@ ONNX_OS?=linux
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export GGML_CMAKE_ARGS?=
export CMAKE_ARGS?=
export BACKEND_LIBS?=
@@ -88,9 +89,45 @@ ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
# IF native is false, we add -DGGML_NATIVE=OFF to GGML_CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
GGML_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
GGML_CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to GGML_CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
GGML_CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to GGML_CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
GGML_CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
GGML_CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
GGML_CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
else
GGML_CMAKE_ARGS+=-DGGML_METAL=ON
GGML_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
# Detect if we are running on arm64
@@ -117,7 +154,7 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1
endif
@@ -142,7 +179,7 @@ ifeq ($(BUILD_TYPE),cublas)
endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
GGML_CMAKE_ARGS+=-DGGML_VULKAN=1
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
@@ -164,7 +201,7 @@ ifeq ($(BUILD_TYPE),hipblas)
export GGML_HIP=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
GGML_CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif
@@ -235,6 +272,8 @@ ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
export CMAKE_ARGS?=$(GGML_CMAKE_ARGS)
.PHONY: all test build vendor get-sources prepare-sources prepare
all: help

View File

@@ -242,6 +242,9 @@ message ModelOptions {
repeated float LoraScales = 61;
repeated string Options = 62;
string CacheTypeKey = 63;
string CacheTypeValue = 64;
}
message Result {

View File

@@ -7,42 +7,6 @@ BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
llama.cpp:
mkdir -p llama.cpp
cd llama.cpp && \
@@ -70,7 +34,7 @@ clean: purge
rm -rf llama.cpp
grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
@echo "Building grpc-server for llama.cpp with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"

View File

@@ -2241,6 +2241,12 @@ static void params_parse(const backend::ModelOptions* request,
}
// params.model_alias ??
params.model_alias = request->modelfile();
if (!request->cachetypekey().empty()) {
params.cache_type_k = request->cachetypekey();
}
if (!request->cachetypevalue().empty()) {
params.cache_type_v = request->cachetypevalue();
}
params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory();
params.cpuparams.n_threads = request->threads();

View File

@@ -1,38 +0,0 @@
package core
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
)
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
type Application struct {
// Application-Level Config
ApplicationConfig *config.ApplicationConfig
// ApplicationState *ApplicationState
// Core Low-Level Services
BackendConfigLoader *config.BackendConfigLoader
ModelLoader *model.ModelLoader
// Backend Services
// EmbeddingsBackendService *backend.EmbeddingsBackendService
// ImageGenerationBackendService *backend.ImageGenerationBackendService
// LLMBackendService *backend.LLMBackendService
// TranscriptionBackendService *backend.TranscriptionBackendService
// TextToSpeechBackendService *backend.TextToSpeechBackendService
// LocalAI System Services
BackendMonitorService *services.BackendMonitorService
GalleryService *services.GalleryService
LocalAIMetricsService *services.LocalAIMetricsService
// OpenAIService *services.OpenAIService
}
// TODO [NEXT PR?]: Break up ApplicationConfig.
// Migrate over stuff that is not set via config at all - especially runtime stuff
type ApplicationState struct {
}

View File

@@ -0,0 +1,39 @@
package application
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
)
type Application struct {
backendLoader *config.BackendConfigLoader
modelLoader *model.ModelLoader
applicationConfig *config.ApplicationConfig
templatesEvaluator *templates.Evaluator
}
func newApplication(appConfig *config.ApplicationConfig) *Application {
return &Application{
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
modelLoader: model.NewModelLoader(appConfig.ModelPath),
applicationConfig: appConfig,
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
}
}
func (a *Application) BackendLoader() *config.BackendConfigLoader {
return a.backendLoader
}
func (a *Application) ModelLoader() *model.ModelLoader {
return a.modelLoader
}
func (a *Application) ApplicationConfig() *config.ApplicationConfig {
return a.applicationConfig
}
func (a *Application) TemplatesEvaluator() *templates.Evaluator {
return a.templatesEvaluator
}

View File

@@ -1,4 +1,4 @@
package startup
package application
import (
"encoding/json"
@@ -8,8 +8,8 @@ import (
"path/filepath"
"time"
"github.com/fsnotify/fsnotify"
"dario.cat/mergo"
"github.com/fsnotify/fsnotify"
"github.com/mudler/LocalAI/core/config"
"github.com/rs/zerolog/log"
)

View File

@@ -1,15 +1,15 @@
package startup
package application
import (
"fmt"
"os"
"github.com/mudler/LocalAI/core"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/library"
"github.com/mudler/LocalAI/pkg/model"
pkgStartup "github.com/mudler/LocalAI/pkg/startup"
@@ -17,8 +17,9 @@ import (
"github.com/rs/zerolog/log"
)
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
func New(opts ...config.AppOption) (*Application, error) {
options := config.NewApplicationConfig(opts...)
application := newApplication(options)
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
@@ -36,28 +37,28 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
// Make sure directories exists
if options.ModelPath == "" {
return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
return nil, fmt.Errorf("options.ModelPath cannot be empty")
}
err = os.MkdirAll(options.ModelPath, 0750)
if err != nil {
return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
return nil, fmt.Errorf("unable to create ModelPath: %q", err)
}
if options.ImageDir != "" {
err := os.MkdirAll(options.ImageDir, 0750)
if err != nil {
return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
return nil, fmt.Errorf("unable to create ImageDir: %q", err)
}
}
if options.AudioDir != "" {
err := os.MkdirAll(options.AudioDir, 0750)
if err != nil {
return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
return nil, fmt.Errorf("unable to create AudioDir: %q", err)
}
}
if options.UploadDir != "" {
err := os.MkdirAll(options.UploadDir, 0750)
if err != nil {
return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
return nil, fmt.Errorf("unable to create UploadDir: %q", err)
}
}
@@ -65,39 +66,36 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
log.Error().Err(err).Msg("error installing models")
}
cl := config.NewBackendConfigLoader(options.ModelPath)
ml := model.NewModelLoader(options.ModelPath)
configLoaderOpts := options.ToConfigLoaderOptions()
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
if err := application.BackendLoader().LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
log.Error().Err(err).Msg("error loading config files")
}
if options.ConfigFile != "" {
if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
if err := application.BackendLoader().LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
log.Error().Err(err).Msg("error loading config file")
}
}
if err := cl.Preload(options.ModelPath); err != nil {
if err := application.BackendLoader().Preload(options.ModelPath); err != nil {
log.Error().Err(err).Msg("error downloading models")
}
if options.PreloadJSONModels != "" {
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
return nil, nil, nil, err
return nil, err
}
}
if options.PreloadModelsFromPath != "" {
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
return nil, nil, nil, err
return nil, err
}
}
if options.Debug {
for _, v := range cl.GetAllBackendConfigs() {
for _, v := range application.BackendLoader().GetAllBackendConfigs() {
log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
}
}
@@ -123,7 +121,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
err := ml.StopAllGRPC()
err := application.ModelLoader().StopAllGRPC()
if err != nil {
log.Error().Err(err).Msg("error while stopping all grpc backends")
}
@@ -131,12 +129,12 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
if options.WatchDog {
wd := model.NewWatchDog(
ml,
application.ModelLoader(),
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle)
ml.SetWatchDog(wd)
application.ModelLoader().SetWatchDog(wd)
go wd.Run()
go func() {
<-options.Context.Done()
@@ -147,7 +145,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
if options.LoadToMemory != nil {
for _, m := range options.LoadToMemory {
cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath,
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
config.LoadOptionDebug(options.Debug),
config.LoadOptionThreads(options.Threads),
config.LoadOptionContextSize(options.ContextSize),
@@ -155,7 +153,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
config.ModelPath(options.ModelPath),
)
if err != nil {
return nil, nil, nil, err
return nil, err
}
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
@@ -163,9 +161,9 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
o := backend.ModelOptions(*cfg, options)
var backendErr error
_, backendErr = ml.Load(o...)
_, backendErr = application.ModelLoader().Load(o...)
if backendErr != nil {
return nil, nil, nil, err
return nil, err
}
}
}
@@ -174,7 +172,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
startWatcher(options)
log.Info().Msg("core/startup process completed!")
return cl, ml, options, nil
return application, nil
}
func startWatcher(options *config.ApplicationConfig) {
@@ -201,32 +199,3 @@ func startWatcher(options *config.ApplicationConfig) {
log.Error().Err(err).Msg("failed creating watcher")
}
}
// In Lieu of a proper DI framework, this function wires up the Application manually.
// This is in core/startup rather than core/state.go to keep package references clean!
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
app := &core.Application{
ApplicationConfig: appConfig,
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
}
var err error
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
if err != nil {
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
}
return app
}

View File

@@ -117,12 +117,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
ss := ""
var partialRune []byte
err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
msg := reply.GetMessage()
partialRune = append(partialRune, msg...)
tokenUsage.Prompt = int(reply.PromptTokens)
tokenUsage.Completion = int(reply.Tokens)
err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
partialRune = append(partialRune, chars...)
for len(partialRune) > 0 {
r, size := utf8.DecodeRune(partialRune)
@@ -136,10 +132,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
partialRune = partialRune[size:]
}
if len(msg) == 0 {
tokenCallback("", tokenUsage)
}
})
return LLMResponse{
Response: ss,

View File

@@ -151,6 +151,8 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
TensorParallelSize: int32(c.TensorParallelSize),
MMProj: c.MMProj,
FlashAttention: c.FlashAttention,
CacheTypeKey: c.CacheTypeK,
CacheTypeValue: c.CacheTypeV,
NoKVOffload: c.NoKVOffloading,
YarnExtFactor: c.YarnExtFactor,
YarnAttnFactor: c.YarnAttnFactor,

View File

@@ -6,12 +6,12 @@ import (
"strings"
"time"
"github.com/mudler/LocalAI/core/application"
cli_api "github.com/mudler/LocalAI/core/cli/api"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/startup"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
@@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
}
if r.PreloadBackendOnly {
_, _, _, err := startup.Startup(opts...)
_, err := application.New(opts...)
return err
}
cl, ml, options, err := startup.Startup(opts...)
app, err := application.New(opts...)
if err != nil {
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
}
appHTTP, err := http.App(cl, ml, options)
appHTTP, err := http.API(app)
if err != nil {
log.Error().Err(err).Msg("error during HTTP App construction")
return err

View File

@@ -155,8 +155,10 @@ type LLMConfig struct {
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
CacheTypeK string `yaml:"cache_type_k"`
CacheTypeV string `yaml:"cache_type_v"`
RopeScaling string `yaml:"rope_scaling"`
ModelType string `yaml:"type"`
@@ -204,6 +206,8 @@ type TemplateConfig struct {
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
Multimodal string `yaml:"multimodal"`
JinjaTemplate bool `yaml:"jinja_template"`
}
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {

View File

@@ -26,14 +26,14 @@ const (
type settingsConfig struct {
StopWords []string
TemplateConfig TemplateConfig
RepeatPenalty float64
RepeatPenalty float64
}
// default settings to adopt with a given model family
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
Gemma: {
RepeatPenalty: 1.0,
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
TemplateConfig: TemplateConfig{
Chat: "{{.Input }}\n<start_of_turn>model\n",
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
@@ -200,6 +200,18 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
} else {
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
}
if cfg.HasTemplate() {
return
}
// identify from well known templates first, otherwise use the raw jinja template
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
if found {
// try to use the jinja template
cfg.TemplateConfig.JinjaTemplate = true
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
}
}
func identifyFamily(f *gguf.GGUFFile) familyType {

View File

@@ -14,10 +14,9 @@ import (
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/http/routes"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
"github.com/gofiber/contrib/fiberzerolog"
"github.com/gofiber/fiber/v2"
@@ -49,18 +48,18 @@ var embedDirStatic embed.FS
// @in header
// @name Authorization
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
func API(application *application.Application) (*fiber.App, error) {
fiberCfg := fiber.Config{
Views: renderEngine(),
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
// We disable the Fiber startup message as it does not conform to structured logging.
// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
DisableStartupMessage: true,
// Override default error handler
}
if !appConfig.OpaqueErrors {
if !application.ApplicationConfig().OpaqueErrors {
// Normally, return errors as JSON responses
fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -86,9 +85,9 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
}
}
app := fiber.New(fiberCfg)
router := fiber.New(fiberCfg)
app.Hooks().OnListen(func(listenData fiber.ListenData) error {
router.Hooks().OnListen(func(listenData fiber.ListenData) error {
scheme := "http"
if listenData.TLS {
scheme = "https"
@@ -99,82 +98,82 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
logger := log.Logger
app.Use(fiberzerolog.New(fiberzerolog.Config{
router.Use(fiberzerolog.New(fiberzerolog.Config{
Logger: &logger,
}))
// Default middleware config
if !appConfig.Debug {
app.Use(recover.New())
if !application.ApplicationConfig().Debug {
router.Use(recover.New())
}
if !appConfig.DisableMetrics {
if !application.ApplicationConfig().DisableMetrics {
metricsService, err := services.NewLocalAIMetricsService()
if err != nil {
return nil, err
}
if metricsService != nil {
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
app.Hooks().OnShutdown(func() error {
router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
router.Hooks().OnShutdown(func() error {
return metricsService.Shutdown()
})
}
}
// Health Checks should always be exempt from auth, so register these first
routes.HealthRoutes(app)
routes.HealthRoutes(router)
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig())
if err != nil || kaConfig == nil {
return nil, fmt.Errorf("failed to create key auth config: %w", err)
}
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
app.Use(v2keyauth.New(*kaConfig))
router.Use(v2keyauth.New(*kaConfig))
if appConfig.CORS {
if application.ApplicationConfig().CORS {
var c func(ctx *fiber.Ctx) error
if appConfig.CORSAllowOrigins == "" {
if application.ApplicationConfig().CORSAllowOrigins == "" {
c = cors.New()
} else {
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins})
}
app.Use(c)
router.Use(c)
}
if appConfig.CSRF {
if application.ApplicationConfig().CSRF {
log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
app.Use(csrf.New())
router.Use(csrf.New())
}
// Load config jsons
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
galleryService := services.NewGalleryService(appConfig)
galleryService.Start(appConfig.Context, cl)
galleryService := services.NewGalleryService(application.ApplicationConfig())
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
if !appConfig.DisableWebUI {
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
routes.RegisterOpenAIRoutes(router, application)
if !application.ApplicationConfig().DisableWebUI {
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
}
routes.RegisterJINARoutes(app, cl, ml, appConfig)
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
httpFS := http.FS(embedDirStatic)
app.Use(favicon.New(favicon.Config{
router.Use(favicon.New(favicon.Config{
URL: "/favicon.ico",
FileSystem: httpFS,
File: "static/favicon.ico",
}))
app.Use("/static", filesystem.New(filesystem.Config{
router.Use("/static", filesystem.New(filesystem.Config{
Root: httpFS,
PathPrefix: "static",
Browse: true,
@@ -182,7 +181,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
// Define a custom 404 handler
// Note: keep this at the bottom!
app.Use(notFoundHandler)
router.Use(notFoundHandler)
return app, nil
return router, nil
}

View File

@@ -12,15 +12,14 @@ import (
"path/filepath"
"runtime"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/startup"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"gopkg.in/yaml.v3"
@@ -252,9 +251,6 @@ var _ = Describe("API test", func() {
var cancel context.CancelFunc
var tmpdir string
var modelDir string
var bcl *config.BackendConfigLoader
var ml *model.ModelLoader
var applicationConfig *config.ApplicationConfig
commonOpts := []config.AppOption{
config.WithDebug(true),
@@ -300,7 +296,7 @@ var _ = Describe("API test", func() {
},
}
bcl, ml, applicationConfig, err = startup.Startup(
application, err := application.New(
append(commonOpts,
config.WithContext(c),
config.WithGalleries(galleries),
@@ -310,7 +306,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(backendAssetsDir))...)
Expect(err).ToNot(HaveOccurred())
app, err = App(bcl, ml, applicationConfig)
app, err = API(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -539,7 +535,7 @@ var _ = Describe("API test", func() {
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
@@ -641,7 +637,7 @@ var _ = Describe("API test", func() {
},
}
bcl, ml, applicationConfig, err = startup.Startup(
application, err := application.New(
append(commonOpts,
config.WithContext(c),
config.WithAudioDir(tmpdir),
@@ -652,7 +648,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(tmpdir))...,
)
Expect(err).ToNot(HaveOccurred())
app, err = App(bcl, ml, applicationConfig)
app, err = API(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -772,14 +768,14 @@ var _ = Describe("API test", func() {
var err error
bcl, ml, applicationConfig, err = startup.Startup(
application, err := application.New(
append(commonOpts,
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
config.WithContext(c),
config.WithModelPath(modelPath),
)...)
Expect(err).ToNot(HaveOccurred())
app, err = App(bcl, ml, applicationConfig)
app, err = API(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -990,14 +986,14 @@ var _ = Describe("API test", func() {
c, cancel = context.WithCancel(context.Background())
var err error
bcl, ml, applicationConfig, err = startup.Startup(
application, err := application.New(
append(commonOpts,
config.WithContext(c),
config.WithModelPath(modelPath),
config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
)
Expect(err).ToNot(HaveOccurred())
app, err = App(bcl, ml, applicationConfig)
app, err = API(application)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")

View File

@@ -14,6 +14,8 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/templates"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
@@ -24,7 +26,7 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
var id, textContentToReturn string
var created int
@@ -39,15 +41,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
responses <- initialMessage
ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
choices := []schema.Choice{}
if s != "" {
choices = append(choices, schema.Choice{Delta: &schema.Message{Content: &s}, Index: 0})
}
resp := schema.OpenAIResponse{
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: choices,
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
Object: "chat.completion.chunk",
Usage: schema.OpenAIUsage{
PromptTokens: usage.Prompt,
@@ -298,148 +296,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
// If we are using the tokenizer template, we don't need to process the messages
// unless we are processing functions
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
suppressConfigSystemPrompt := false
mess := []string{}
for messageIndex, i := range input.Messages {
var content string
role := i.Role
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
roleFn := "assistant_function_call"
r := config.Roles[roleFn]
if r != "" {
role = roleFn
}
}
r := config.Roles[role]
contentExists := i.Content != nil && i.StringContent != ""
fcall := i.FunctionCall
if len(i.ToolCalls) > 0 {
fcall = i.ToolCalls
}
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := model.ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: r,
RoleName: role,
Content: i.StringContent,
FunctionCall: fcall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(input.Messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
MessageIndex: messageIndex,
}
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
if err != nil {
log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
} else {
if templatedChatMessage == "" {
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
}
log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
content = templatedChatMessage
}
}
marshalAnyRole := func(f any) {
j, err := json.Marshal(f)
if err == nil {
if contentExists {
content += "\n" + fmt.Sprint(r, " ", string(j))
} else {
content = fmt.Sprint(r, " ", string(j))
}
}
}
marshalAny := func(f any) {
j, err := json.Marshal(f)
if err == nil {
if contentExists {
content += "\n" + string(j)
} else {
content = string(j)
}
}
}
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, i.StringContent)
}
if i.FunctionCall != nil {
marshalAnyRole(i.FunctionCall)
}
if i.ToolCalls != nil {
marshalAnyRole(i.ToolCalls)
}
} else {
if contentExists {
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
marshalAny(i.FunctionCall)
}
if i.ToolCalls != nil {
marshalAny(i.ToolCalls)
}
}
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
if contentExists && role == "system" {
suppressConfigSystemPrompt = true
}
}
mess = append(mess, content)
}
joinCharacter := "\n"
if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
}
predInput = strings.Join(mess, joinCharacter)
log.Debug().Msgf("Prompt (before templating): %s", predInput)
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Chat != "" && !shouldUseFn {
templateFile = config.TemplateConfig.Chat
}
if config.TemplateConfig.Functions != "" && shouldUseFn {
templateFile = config.TemplateConfig.Functions
}
if templateFile != "" {
templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
SuppressSystemPrompt: suppressConfigSystemPrompt,
Input: predInput,
Functions: funcs,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
} else {
log.Debug().Msgf("Template failed loading: %s", err.Error())
}
}
predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)
log.Debug().Msgf("Prompt (after templating): %s", predInput)
if shouldUseFn && config.Grammar != "" {
if config.Grammar != "" {
log.Debug().Msgf("Grammar: %+v", config.Grammar)
}
}
@@ -469,9 +329,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
toolsCalled := false
for ev := range responses {
usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
if len(ev.Choices) == 0 {
break
}
if len(ev.Choices[0].Delta.ToolCalls) > 0 {
toolsCalled = true
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
@@ -25,7 +26,7 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/completions [post]
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
id := uuid.New().String()
created := int(time.Now().Unix())
@@ -94,17 +95,6 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
c.Set("Transfer-Encoding", "chunked")
}
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Completion != "" {
templateFile = config.TemplateConfig.Completion
}
if input.Stream {
if len(config.PromptStrings) > 1 {
return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
@@ -112,15 +102,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
predInput := config.PromptStrings[0]
if templateFile != "" {
templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
Input: predInput,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
Input: predInput,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
responses := make(chan schema.OpenAIResponse)
@@ -165,16 +153,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
totalTokenUsage := backend.TokenUsage{}
for k, i := range config.PromptStrings {
if templateFile != "" {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
Input: i,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
Input: i,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, tokenUsage, err := ComputeChoices(

View File

@@ -12,6 +12,7 @@ import (
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/schema"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log"
)
@@ -21,7 +22,8 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/edits [post]
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
if err != nil {
@@ -35,31 +37,18 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConf
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Edit != "" {
templateFile = config.TemplateConfig.Edit
}
var result []schema.Choice
totalTokenUsage := backend.TokenUsage{}
for _, i := range config.InputStrings {
if templateFile != "" {
templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
Input: i,
Instruction: input.Instruction,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
Input: i,
Instruction: input.Instruction,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {

View File

@@ -11,62 +11,62 @@ import (
"github.com/mudler/LocalAI/pkg/model"
)
func RegisterLocalAIRoutes(app *fiber.App,
func RegisterLocalAIRoutes(router *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) {
app.Get("/swagger/*", swagger.HandlerDefault) // default
router.Get("/swagger/*", swagger.HandlerDefault) // default
// LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint {
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
}
app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
// Stores
sl := model.NewModelLoader("")
app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
if !appConfig.DisableMetrics {
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
}
// Experimental Backend Statistics Module
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
if p2p.IsP2PEnabled() {
app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
}
app.Get("/version", func(c *fiber.Ctx) error {
router.Get("/version", func(c *fiber.Ctx) error {
return c.JSON(struct {
Version string `json:"version"`
}{Version: internal.PrintableVersion()})
})
app.Get("/system", localai.SystemInformations(ml, appConfig))
router.Get("/system", localai.SystemInformations(ml, appConfig))
// misc
app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
}

View File

@@ -2,84 +2,134 @@ package routes
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/pkg/model"
)
func RegisterOpenAIRoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig) {
application *application.Application) {
// openAI compatible API endpoint
// chat
app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
app.Post("/v1/chat/completions",
openai.ChatEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/chat/completions",
openai.ChatEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
// edit
app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
app.Post("/v1/edits",
openai.EditEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/edits",
openai.EditEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
// assistant
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
// files
app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
// completion
app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/v1/completions",
openai.CompletionEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/completions",
openai.CompletionEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/v1/engines/:model/completions",
openai.CompletionEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
// embeddings
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
// audio
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
// images
app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
if appConfig.ImageDir != "" {
app.Static("/generated-images", appConfig.ImageDir)
if application.ApplicationConfig().ImageDir != "" {
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
}
if appConfig.AudioDir != "" {
app.Static("/generated-audio", appConfig.AudioDir)
if application.ApplicationConfig().AudioDir != "" {
app.Static("/generated-audio", application.ApplicationConfig().AudioDir)
}
// List models
app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
app.Get("/models", openai.ListModelsEndpoint(cl, ml))
app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
}

View File

@@ -10,13 +10,13 @@ ico = "rocket_launch"
For installing LocalAI in Kubernetes, the deployment file from the `examples` can be used and customized as prefered:
```
kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment.yaml
kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment.yaml
```
For Nvidia GPUs:
```
kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI/master/examples/kubernetes/deployment-nvidia.yaml
kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment-nvidia.yaml
```
Alternatively, the [helm chart](https://github.com/go-skynet/helm-charts) can be used as well:

View File

@@ -6,7 +6,7 @@ weight = 24
url = "/model-compatibility/"
+++
Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the compatible models families and the associated binding repository.
Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the backends, compatible models families and the associated repository.
{{% alert note %}}
@@ -16,19 +16,8 @@ LocalAI will attempt to automatically load models which are not explicitly confi
| Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration |
|----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A |
| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A |
| [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A |
| [falcon-ggml](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Falcon (*) | yes | GPT | no | no | N/A |
| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Dolly | yes | GPT | no | no | N/A |
| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPTJ | yes | GPT | no | no | N/A |
| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | MPT | yes | GPT | no | no | N/A |
| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Replit | yes | GPT | no | no | N/A |
| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT NeoX, RedPajama, StableLM | yes | GPT | no | no | N/A |
| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp)) | Bloom | yes | GPT | no | no | N/A |
| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rwkv.cpp)) | rwkv | yes | GPT | no | yes | N/A |
| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp)) | bert | no | Embeddings only | yes | no | N/A |
| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [llama.cpp's ggml model (backward compatibility with old format, before GGUF)](https://github.com/ggerganov/llama.cpp) ([binding](https://github.com/go-skynet/go-llama.cpp)) | LLama, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [whisper](https://github.com/ggerganov/whisper.cpp) | whisper | no | Audio | no | no | N/A |
| [stablediffusion](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) | stablediffusion | no | Image | no | no | N/A |
| [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A |
@@ -40,11 +29,18 @@ LocalAI will attempt to automatically load models which are not explicitly confi
| `diffusers` | SD,... | no | Image generation | no | no | N/A |
| `vall-e-x` | Vall-E | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| `vllm` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA |
| `mamba` | Mamba models architecture | yes | GPT | no | no | CPU/CUDA |
| `exllama2` | GPTQ | yes | GPT only | no | no | N/A |
| `transformers-musicgen` | | no | Audio generation | no | no | N/A |
| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A |
| `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CPU/CUDA |
| `transformers` | Various GPTs and quantization formats | yes | GPT, embeddings | yes | yes**** | CPU/CUDA/XPU |
| [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | yes |
| [stablediffusion-cpp](https://github.com/leejet/stable-diffusion.cpp) | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker | no | Image | no | no | N/A |
| [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU |
Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).

View File

@@ -1,3 +1,3 @@
{
"version": "v2.23.0"
"version": "v2.24.2"
}

View File

@@ -1,4 +1,52 @@
---
- &intellect1
name: "intellect-1-instruct"
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
icon: https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct/resolve/main/intellect-1-map.png
urls:
- https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct
- https://huggingface.co/bartowski/INTELLECT-1-Instruct-GGUF
tags:
- llm
- gguf
- gpu
- cpu
- intellect
license: apache-2.0
description: |
INTELLECT-1 is the first collaboratively trained 10 billion parameter language model trained from scratch on 1 trillion tokens of English text and code.
This is an instruct model. The base model associated with it is INTELLECT-1.
INTELLECT-1 was trained on up to 14 concurrent nodes distributed across 3 continents, with contributions from 30 independent community contributors providing compute. The training code utilizes the prime framework, a scalable distributed training framework designed for fault-tolerant, dynamically scaling, high-perfomance training on unreliable, globally distributed workers. The key abstraction that allows dynamic scaling is the ElasticDeviceMesh which manages dynamic global process groups for fault-tolerant communication across the internet and local process groups for communication within a node. The model was trained using the DiLoCo algorithms with 100 inner steps. The global all-reduce was done with custom int8 all-reduce kernels to reduce the communication payload required, greatly reducing the communication overhead by a factor 400x.
overrides:
parameters:
model: INTELLECT-1-Instruct-Q4_K_M.gguf
files:
- filename: INTELLECT-1-Instruct-Q4_K_M.gguf
sha256: 5df236fe570e5998d07fb3207788eac811ef3b77dd2a0ad04a2ef5c6361f3030
uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf
- &llama33
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
license: llama3.3
description: |
The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
tags:
- llm
- gguf
- gpu
- cpu
- llama3.3
name: "llama-3.3-70b-instruct"
urls:
- https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
- https://huggingface.co/MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF
overrides:
parameters:
model: Llama-3.3-70B-Instruct.Q4_K_M.gguf
files:
- filename: Llama-3.3-70B-Instruct.Q4_K_M.gguf
sha256: 4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3
uri: huggingface://MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/Llama-3.3-70B-Instruct.Q4_K_M.gguf
- &rwkv
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
name: "rwkv-6-world-7b"
@@ -1791,6 +1839,119 @@
- filename: HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf
sha256: a356f279a104bff0bbc2ef7ec136c1e774153de8893bf988083e96fb7f4bc053
uri: huggingface://QuantFactory/HomerCreativeAnvita-Mix-Qw7B-GGUF/HomerCreativeAnvita-Mix-Qw7B.Q4_K_M.gguf
- !!merge <<: *qwen25
name: "math-iio-7b-instruct"
icon: https://cdn-uploads.huggingface.co/production/uploads/65bb837dbfb878f46c77de4c/faLfR-doaWP_BLUkOQrbq.png
urls:
- https://huggingface.co/prithivMLmods/Math-IIO-7B-Instruct
- https://huggingface.co/QuantFactory/Math-IIO-7B-Instruct-GGUF
description: |
The Math IIO 7B Instruct is a fine-tuned language model based on the robust Qwen2.5-7B-Instruct architecture. This model has been specifically trained to excel in single-shot mathematical reasoning and instruction-based tasks, making it a reliable choice for educational, analytical, and problem-solving applications.
Key Features:
Math-Optimized Capabilities:
The model is designed to handle complex mathematical problems, step-by-step calculations, and reasoning tasks.
Instruction-Tuned:
Fine-tuned for better adherence to structured queries and task-oriented prompts, enabling clear and concise outputs.
Large Vocabulary:
Equipped with an extensive tokenizer configuration and custom tokens to ensure precise mathematical notation support.
overrides:
parameters:
model: Math-IIO-7B-Instruct.Q4_K_M.gguf
files:
- filename: Math-IIO-7B-Instruct.Q4_K_M.gguf
sha256: 8ffda0b6a43eb9997dfd7db48fe3bd0970fd1b9b86fb68f082c38622a48b58f4
uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf
- !!merge <<: *qwen25
name: "virtuoso-small"
icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp
urls:
- https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF
description: |
Virtuoso-Small is the debut public release of the Virtuoso series of models by Arcee.ai, designed to bring cutting-edge generative AI capabilities to organizations and developers in a compact, efficient form. With 14 billion parameters, Virtuoso-Small is an accessible entry point for high-quality instruction-following, complex reasoning, and business-oriented generative AI tasks.
overrides:
parameters:
model: Virtuoso-Small-Q4_K_M.gguf
files:
- filename: Virtuoso-Small-Q4_K_M.gguf
sha256: 07db215cdfcb05036567017fe20e50e60cb2da28d1f9a8251cc4f18c8caa247f
uri: huggingface://arcee-ai/Virtuoso-Small-GGUF/Virtuoso-Small-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-7b-homeranvita-nerdmix"
urls:
- https://huggingface.co/ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix
- https://huggingface.co/QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF
description: |
ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix is an advanced language model meticulously crafted by merging five pre-trained models using the powerful mergekit framework. This fusion leverages the Model Stock merge method to combine the creative prowess of Qandora, the instructive capabilities of Qwen-Instruct-Fusion, the sophisticated blending of HomerSlerp1, the mathematical precision of Cybertron-MGS, and the uncensored expertise of Qwen-Nerd. The resulting model excels in creative text generation, contextual understanding, technical reasoning, and dynamic conversational interactions.
overrides:
parameters:
model: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf
files:
- filename: Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf
sha256: 73db2ca3ab50e8627352078988cd173e7447c5e8199a7db9e554602da1362e5f
uri: huggingface://QuantFactory/Qwen2.5-7B-HomerAnvita-NerdMix-GGUF/Qwen2.5-7B-HomerAnvita-NerdMix.Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-math-14b-instruct"
urls:
- https://huggingface.co/qingy2024/Qwen2.5-Math-14B-Instruct-Preview
- https://huggingface.co/QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF
description: |
This Qwen 2.5 model was trained 2x faster with Unsloth and Huggingface's TRL library.
Fine-tuned it for 400 steps on garage-bAInd/Open-Platypus with a batch size of 3.
overrides:
parameters:
model: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf
files:
- filename: Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf
sha256: 14e672394738a7d9f14a6cb16fd9a649b113a19a8b4934f9c18299fc4e286ab6
uri: huggingface://QuantFactory/Qwen2.5-Math-14B-Instruct-GGUF/Qwen2.5-Math-14B-Instruct.Q4_K_M.gguf
- !!merge <<: *qwen25
name: "sailor2-1b-chat"
icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg
urls:
- https://huggingface.co/sail/Sailor2-1B-Chat
- https://huggingface.co/bartowski/Sailor2-1B-Chat-GGUF
description: |
Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region.
Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively.
overrides:
parameters:
model: Sailor2-1B-Chat-Q4_K_M.gguf
files:
- filename: Sailor2-1B-Chat-Q4_K_M.gguf
sha256: 782e8abed13d51a2083eadfb2f6d94c2cd77940532f612a99e6f6bec9b3501d4
uri: huggingface://bartowski/Sailor2-1B-Chat-GGUF/Sailor2-1B-Chat-Q4_K_M.gguf
- !!merge <<: *qwen25
icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg
name: "sailor2-8b-chat"
urls:
- https://huggingface.co/bartowski/Sailor2-8B-Chat-GGUF
description: |
Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region.
Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively.
overrides:
parameters:
model: Sailor2-8B-Chat-Q4_K_M.gguf
files:
- filename: Sailor2-8B-Chat-Q4_K_M.gguf
sha256: 1a6aaadd6f6ef9c2290d66b348ebcbd6fdec542834cde622498fbd467d966103
uri: huggingface://bartowski/Sailor2-8B-Chat-GGUF/Sailor2-8B-Chat-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "sailor2-20b-chat"
icon: https://huggingface.co/sail/Sailor2-1B-Chat/resolve/main/sailor2_banner.jpg
urls:
- https://huggingface.co/bartowski/Sailor2-20B-Chat-GGUF
description: |
Sailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). Our research highlights a strong demand for models in the 8B and 20B parameter range for production use, alongside 1B models for specialized applications, such as speculative decoding and research purposes. These models, released under the Apache 2.0 license, provide enhanced accessibility to advanced language technologies across the region.
Sailor2 builds upon the foundation of the awesome multilingual model Qwen 2.5 and is continuously pre-trained on 500B tokens to support 15 languages better with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. By addressing the growing demand for diverse, robust, and accessible language models, Sailor2 seeks to serve the underserved in SEA areas with open, inclusive, and accessible multilingual LLMs. The Sailor2 model comes in three sizes, 1B, 8B, and 20B, which are expanded from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively.
overrides:
parameters:
model: Sailor2-20B-Chat-Q4_K_M.gguf
files:
- filename: Sailor2-20B-Chat-Q4_K_M.gguf
sha256: 0cf8fcd367accee19702ef15ee964bddd5035bde034afddd838f818e7655534a
uri: huggingface://bartowski/Sailor2-20B-Chat-GGUF/Sailor2-20B-Chat-Q4_K_M.gguf
- &archfunct
license: apache-2.0
tags:
@@ -3420,6 +3581,186 @@
- filename: Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf
sha256: c481e7089ffaedd5ae8c74dccc7fb45f6509640b661fa086ae979f6fefc3fdba
uri: huggingface://QuantFactory/Sparse-Llama-3.1-8B-2of4-GGUF/Sparse-Llama-3.1-8B-2of4.Q4_K_M.gguf
- !!merge <<: *llama31
name: "loki-v2.6-8b-1024k"
icon: https://cdn-uploads.huggingface.co/production/uploads/6472de046facfb01d8b1fb9d/uQPITKRS8XLTLyaiGwgh_.jpeg
urls:
- https://huggingface.co/QuantFactory/Loki-v2.6-8b-1024k-GGUF
description: |
The following models were included in the merge:
MrRobotoAI/Epic_Fiction-8b
MrRobotoAI/Unaligned-RP-Base-8b-1024k
MrRobotoAI/Loki-.Epic_Fiction.-8b
Casual-Autopsy/L3-Luna-8B
Casual-Autopsy/L3-Super-Nova-RP-8B
Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
Casual-Autopsy/Halu-L3-Stheno-BlackOasis-8B
Undi95/Llama-3-LewdPlay-8B
Undi95/Llama-3-LewdPlay-8B-evo
Undi95/Llama-3-Unholy-8B
ChaoticNeutrals/Hathor_Tahsin-L3-8B-v0.9
ChaoticNeutrals/Hathor_RP-v.01-L3-8B
ChaoticNeutrals/Domain-Fusion-L3-8B
ChaoticNeutrals/T-900-8B
ChaoticNeutrals/Poppy_Porpoise-1.4-L3-8B
ChaoticNeutrals/Templar_v1_8B
ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
ChaoticNeutrals/Sekhmet_Gimmel-L3.1-8B-v0.3
zeroblu3/LewdPoppy-8B-RP
tohur/natsumura-storytelling-rp-1.0-llama-3.1-8b
jeiku/Chaos_RP_l3_8B
tannedbum/L3-Nymeria-Maid-8B
Nekochu/Luminia-8B-RP
vicgalle/Humanish-Roleplay-Llama-3.1-8B
saishf/SOVLish-Maid-L3-8B
Dogge/llama-3-8B-instruct-Bluemoon-Freedom-RP
MrRobotoAI/Epic_Fiction-8b-v4
maldv/badger-lambda-0-llama-3-8b
maldv/llama-3-fantasy-writer-8b
maldv/badger-kappa-llama-3-8b
maldv/badger-mu-llama-3-8b
maldv/badger-lambda-llama-3-8b
maldv/badger-iota-llama-3-8b
maldv/badger-writer-llama-3-8b
Magpie-Align/MagpieLM-8B-Chat-v0.1
nbeerbower/llama-3-gutenberg-8B
nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
nbeerbower/llama-3-spicy-abliterated-stella-8B
Magpie-Align/MagpieLM-8B-SFT-v0.1
NeverSleep/Llama-3-Lumimaid-8B-v0.1
mlabonne/NeuralDaredevil-8B-abliterated
mlabonne/Daredevil-8B-abliterated
NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
nothingiisreal/L3-8B-Instruct-Abliterated-DWP
openchat/openchat-3.6-8b-20240522
turboderp/llama3-turbcat-instruct-8b
UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3
Undi95/Llama-3-LewdPlay-8B
TIGER-Lab/MAmmoTH2-8B-Plus
OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
refuelai/Llama-3-Refueled
SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
NousResearch/Hermes-2-Theta-Llama-3-8B
ResplendentAI/Nymph_8B
grimjim/Llama-3-Oasis-v1-OAS-8B
flammenai/Mahou-1.3b-llama3-8B
lemon07r/Llama-3-RedMagic4-8B
grimjim/Llama-3.1-SuperNova-Lite-lorabilterated-8B
grimjim/Llama-Nephilim-Metamorphosis-v2-8B
lemon07r/Lllama-3-RedElixir-8B
grimjim/Llama-3-Perky-Pat-Instruct-8B
ChaoticNeutrals/Hathor_RP-v.01-L3-8B
grimjim/llama-3-Nephilim-v2.1-8B
ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
migtissera/Llama-3-8B-Synthia-v3.5
Locutusque/Llama-3-Hercules-5.0-8B
WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
iRyanBell/ARC1-II
HPAI-BSC/Llama3-Aloe-8B-Alpha
HaitameLaf/Llama-3-8B-StoryGenerator
failspy/Meta-Llama-3-8B-Instruct-abliterated-v3
Undi95/Llama-3-Unholy-8B
ajibawa-2023/Uncensored-Frank-Llama-3-8B
ajibawa-2023/SlimOrca-Llama-3-8B
ChaoticNeutrals/Templar_v1_8B
aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K
ChaoticNeutrals/Hathor_Tahsin-L3-8B-v0.9
Blackroot/Llama-3-Gamma-Twist
FPHam/L3-8B-Everything-COT
Blackroot/Llama-3-LongStory
ChaoticNeutrals/Sekhmet_Gimmel-L3.1-8B-v0.3
abacusai/Llama-3-Smaug-8B
Khetterman/CursedMatrix-8B-v9
ajibawa-2023/Scarlett-Llama-3-8B-v1.0
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/physics_non_masked
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/electrical_engineering
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_chemistry
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy_non_masked
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_physics
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/formal_logic
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/philosophy_100
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/conceptual_physics
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/college_computer_science
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/psychology_non_masked
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/psychology
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Blackroot/Llama3-RP-Lora
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LimaRP-Instruct-LoRA-8B
MrRobotoAI/Unaligned-RP-Base-8b-1024k + nothingiisreal/llama3-8B-DWP-lora
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/world_religions
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/high_school_european_history
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/electrical_engineering
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-8B-Abomination-LORA
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LongStory-LORA
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/human_sexuality
MrRobotoAI/Unaligned-RP-Base-8b-1024k + surya-narayanan/sociology
MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Theory_of_Mind_Llama3
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Smarts_Llama3
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Llama-3-LongStory-LORA
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/Nimue-8B
MrRobotoAI/Unaligned-RP-Base-8b-1024k + vincentyandex/lora_llama3_chunked_novel_bs128
MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Aura_Llama3
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Azazelle/L3-Daybreak-8b-lora
MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/Luna_Llama3
MrRobotoAI/Unaligned-RP-Base-8b-1024k + nicce/story-mixtral-8x7b-lora
MrRobotoAI/Unaligned-RP-Base-8b-1024k + Blackroot/Llama-3-LongStory-LORA
MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/NoWarning_Llama3
MrRobotoAI/Unaligned-RP-Base-8b-1024k + ResplendentAI/BlueMoon_Llama3
overrides:
parameters:
model: Loki-v2.6-8b-1024k.Q4_K_M.gguf
files:
- filename: Loki-v2.6-8b-1024k.Q4_K_M.gguf
sha256: 9b15c1fee0a0e6d6ed97df3d1b6fc8f774e6e1bd388328599e731c62e0f19d81
uri: huggingface://QuantFactory/Loki-v2.6-8b-1024k-GGUF/Loki-v2.6-8b-1024k.Q4_K_M.gguf
- !!merge <<: *llama31
name: "impish_mind_8b"
icon: https://huggingface.co/SicariusSicariiStuff/Impish_Mind_8B/resolve/main/Images/Impish_Mind.png
urls:
- https://huggingface.co/SicariusSicariiStuff/Impish_Mind_8B
- https://huggingface.co/bartowski/Impish_Mind_8B-GGUF
description: |
This model was trained with new data and a new approach (compared to my other models). While it may be a bit more censored, it is expected to be significantly smarter. The data used is quite unique, and is also featuring long and complex markdown datasets.
Regarding censorship: Whether uncensoring or enforcing strict censorship, the model tends to lose some of its intelligence. The use of toxic data was kept to a minimum with this model.
Consequently, the model is likely to refuse some requests, this is easly avoidable with a basic system prompt, or assistant impersonation ("Sure thing!..."). Unlike many RP models, this one is designed to excel at general assistant tasks as well.
overrides:
parameters:
model: Impish_Mind_8B-Q4_K_M.gguf
files:
- filename: Impish_Mind_8B-Q4_K_M.gguf
sha256: 918f82bcb893c75fa2e846156df7bd3ce359464b960e32ae9171035ee14e7c51
uri: huggingface://bartowski/Impish_Mind_8B-GGUF/Impish_Mind_8B-Q4_K_M.gguf
- !!merge <<: *llama31
name: "tulu-3.1-8b-supernova-smart"
urls:
- https://huggingface.co/bunnycore/Tulu-3.1-8B-SuperNova-Smart
- https://huggingface.co/QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF
description: |
This model was merged using the passthrough merge method using bunnycore/Tulu-3.1-8B-SuperNova + bunnycore/Llama-3.1-8b-smart-lora as a base.
overrides:
parameters:
model: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf
files:
- filename: Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf
sha256: 4b8ba9e64f0667199eee2dcc769f1a90aa9c7730165d42f440fdf107c7585c63
uri: huggingface://QuantFactory/Tulu-3.1-8B-SuperNova-Smart-GGUF/Tulu-3.1-8B-SuperNova-Smart.Q4_K_M.gguf
- !!merge <<: *llama31
name: "b-nimita-l3-8b-v0.02"
urls:
- https://huggingface.co/Arkana08/B-NIMITA-L3-8B-v0.02
- https://huggingface.co/QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF
description: |
B-NIMITA is an AI model designed to bring role-playing scenarios to life with emotional depth and rich storytelling. At its core is NIHAPPY, providing a solid narrative foundation and contextual consistency. This is enhanced by Mythorica, which adds vivid emotional arcs and expressive dialogue, and V-Blackroot, ensuring character consistency and subtle adaptability. This combination allows B-NIMITA to deliver dynamic, engaging interactions that feel natural and immersive.
overrides:
parameters:
model: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf
files:
- filename: B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf
sha256: 625a54848dcd3f23bc06b639a7dfecae14142b5d177dd45acfe7724816bab4cd
uri: huggingface://QuantFactory/B-NIMITA-L3-8B-v0.02-GGUF/B-NIMITA-L3-8B-v0.02.Q4_K_M.gguf
- &deepseek
## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -4230,6 +4571,30 @@
- filename: Chatty-Harry_V3.0.Q4_K_M.gguf
sha256: 54b63bb74498576ca77b801ed096657a93cc2f6b71d707c3605fdb394bd3e622
uri: huggingface://QuantFactory/Chatty-Harry_V3.0-GGUF/Chatty-Harry_V3.0.Q4_K_M.gguf
- !!merge <<: *mistral03
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
name: "mn-chunky-lotus-12b"
icon: https://huggingface.co/FallenMerick/MN-Chunky-Lotus-12B/resolve/main/chunky-lotus.jpg
urls:
- https://huggingface.co/QuantFactory/MN-Chunky-Lotus-12B-GGUF
description: |
I had originally planned to use this model for future/further merges, but decided to go ahead and release it since it scored rather high on my local EQ Bench testing (79.58 w/ 100% parsed @ 8-bit).
Bear in mind that most models tend to score a bit higher on my own local tests as compared to their posted scores. Still, its the highest score I've personally seen from all the models I've tested.
Its a decent model, with great emotional intelligence and acceptable adherence to various character personalities. It does a good job at roleplaying despite being a bit bland at times.
Overall, I like the way it writes, but it has a few formatting issues that show up from time to time, and it has an uncommon tendency to paste walls of character feelings/intentions at the end of some outputs without any prompting. This is something I hope to correct with future iterations.
This is a merge of pre-trained language models created using mergekit.
The following models were included in the merge:
Epiculous/Violet_Twilight-v0.2
nbeerbower/mistral-nemo-gutenberg-12B-v4
flammenai/Mahou-1.5-mistral-nemo-12B
overrides:
parameters:
model: MN-Chunky-Lotus-12B.Q4_K_M.gguf
files:
- filename: MN-Chunky-Lotus-12B.Q4_K_M.gguf
sha256: 363defe0a769fdb715dab75517966a0a80bcdd981a610d4c759099b6c8ff143a
uri: huggingface://QuantFactory/MN-Chunky-Lotus-12B-GGUF/MN-Chunky-Lotus-12B.Q4_K_M.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -7891,6 +8256,43 @@
- filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
- !!merge <<: *llama3
name: "rp-naughty-v1.0c-8b"
urls:
- https://huggingface.co/QuantFactory/RP-Naughty-v1.0c-8b-GGUF
description: |
This model was merged using the Model Stock merge method using aifeifei798/llama3-8B-DarkIdol-2.2-Uncensored-1048K as a base.
The following models were included in the merge:
underwoods/adventure-8b
Khetterman/Multilingual-SaigaSuzume-8B
underwoods/writer-8b
Khetterman/Kosmos-8B-v1
Khetterman/CursedMatrix-8B-v9
overrides:
parameters:
model: RP-Naughty-v1.0c-8b.Q4_K_M.gguf
files:
- filename: RP-Naughty-v1.0c-8b.Q4_K_M.gguf
sha256: c344564d26d0c3d244d31cfeb103666eab37f9dee6678a2dbaf5bfcf4109d789
uri: huggingface://QuantFactory/RP-Naughty-v1.0c-8b-GGUF/RP-Naughty-v1.0c-8b.Q4_K_M.gguf
- !!merge <<: *llama3
name: "bio-medical-llama-3-8b"
icon: https://cdn-uploads.huggingface.co/production/uploads/653f5b93cd52f288490edc83/zPMUugzfOiwTiRw88jm7T.jpeg
urls:
- https://huggingface.co/ContactDoctor/Bio-Medical-Llama-3-8B
- https://huggingface.co/QuantFactory/Bio-Medical-Llama-3-8B-GGUF
description: |
Bio-Medical-Llama-3-8B model is a specialized large language model designed for biomedical applications. It is finetuned from the meta-llama/Meta-Llama-3-8B-Instruct model using a custom dataset containing over 500,000 diverse entries. These entries include a mix of synthetic and manually curated data, ensuring high quality and broad coverage of biomedical topics.
The model is trained to understand and generate text related to various biomedical fields, making it a valuable tool for researchers, clinicians, and other professionals in the biomedical domain.
overrides:
parameters:
model: Bio-Medical-Llama-3-8B.Q4_K_M.gguf
files:
- filename: Bio-Medical-Llama-3-8B.Q4_K_M.gguf
sha256: 672939e0487d02c55734132c25a59f26e4deaac7cd49445a7028f2291139edcc
uri: huggingface://QuantFactory/Bio-Medical-Llama-3-8B-GGUF/Bio-Medical-Llama-3-8B.Q4_K_M.gguf
- &command-R
### START Command-r
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"

5
go.mod
View File

@@ -76,6 +76,7 @@ require (
cloud.google.com/go/auth v0.4.1 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
cloud.google.com/go/compute/metadata v0.3.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
github.com/fasthttp/websocket v1.5.3 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
@@ -84,8 +85,12 @@ require (
github.com/google/s2a-go v0.1.7 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nikolalohinski/gonja/v2 v2.3.2 // indirect
github.com/pion/datachannel v1.5.8 // indirect
github.com/pion/dtls/v2 v2.2.12 // indirect
github.com/pion/ice/v2 v2.3.34 // indirect

12
go.sum
View File

@@ -140,6 +140,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
@@ -268,6 +270,7 @@ github.com/google/go-containerregistry v0.19.2 h1:TannFKE1QSajsP6hPWb5oJNgKe1IKj
github.com/google/go-containerregistry v0.19.2/go.mod h1:YCMFNQeeXeLF+dnhhWkqDItx/JSkH01j1Kis4PsjzFI=
github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ=
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@@ -353,6 +356,8 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
@@ -474,8 +479,12 @@ github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5
github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
@@ -519,6 +528,9 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
github.com/nikolalohinski/gonja v1.5.3 h1:GsA+EEaZDZPGJ8JtpeGN78jidhOlxeJROpqMT9fTj9c=
github.com/nikolalohinski/gonja/v2 v2.3.2 h1:UgLFfqi7L9XfX0PEcE4eUpvGojVQL5KhBfJJaBp7ZxY=
github.com/nikolalohinski/gonja/v2 v2.3.2/go.mod h1:1Wcc/5huTu6y36e0sOFR1XQoFlylw3c3H3L5WOz0RDg=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=

View File

@@ -37,7 +37,7 @@ type Backend interface {
Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)

View File

@@ -136,7 +136,7 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
return client.LoadModel(ctx, in, opts...)
}
func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error {
func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
@@ -158,7 +158,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
}
for {
reply, err := stream.Recv()
feature, err := stream.Recv()
if err == io.EOF {
break
}
@@ -167,7 +167,7 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
return err
}
f(reply)
f(feature.GetMessage())
}
return nil

View File

@@ -35,7 +35,7 @@ func (e *embedBackend) LoadModel(ctx context.Context, in *pb.ModelOptions, opts
return e.s.LoadModel(ctx, in)
}
func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error {
func (e *embedBackend) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error {
bs := &embedBackendServerStream{
ctx: ctx,
fn: f,
@@ -97,11 +97,11 @@ func (e *embedBackend) GetTokenMetrics(ctx context.Context, in *pb.MetricsReques
type embedBackendServerStream struct {
ctx context.Context
fn func(reply *pb.Reply)
fn func(s []byte)
}
func (e *embedBackendServerStream) Send(reply *pb.Reply) error {
e.fn(reply)
e.fn(reply.GetMessage())
return nil
}

View File

@@ -9,8 +9,6 @@ import (
"sync"
"time"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
@@ -23,7 +21,6 @@ type ModelLoader struct {
ModelPath string
mu sync.Mutex
models map[string]*Model
templates *templates.TemplateCache
wd *WatchDog
}
@@ -31,7 +28,6 @@ func NewModelLoader(modelPath string) *ModelLoader {
nml := &ModelLoader{
ModelPath: modelPath,
models: make(map[string]*Model),
templates: templates.NewTemplateCache(modelPath),
}
return nml

View File

@@ -1,52 +0,0 @@
package model
import (
"fmt"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/templates"
)
// Rather than pass an interface{} to the prompt template:
// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
type PromptTemplateData struct {
SystemPrompt string
SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
Input string
Instruction string
Functions []functions.Function
MessageIndex int
}
type ChatMessageTemplateData struct {
SystemPrompt string
Role string
RoleName string
FunctionName string
Content string
MessageIndex int
Function bool
FunctionCall interface{}
LastMessage bool
}
const (
ChatPromptTemplate templates.TemplateType = iota
ChatMessageTemplate
CompletionPromptTemplate
EditPromptTemplate
FunctionsPromptTemplate
)
func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) {
// TODO: should this check be improved?
if templateType == ChatMessageTemplate {
return "", fmt.Errorf("invalid templateType: ChatMessage")
}
return ml.templates.EvaluateTemplate(templateType, templateName, in)
}
func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData)
}

View File

@@ -1,197 +0,0 @@
package model_test
import (
. "github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>`
const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>`
var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"template": llama3,
"expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "user",
RoleName: "user",
Content: "A long time ago in a galaxy far, far away...",
FunctionCall: nil,
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
"assistant": {
"template": llama3,
"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "assistant",
RoleName: "assistant",
Content: "A long time ago in a galaxy far, far away...",
FunctionCall: nil,
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
"function_call": {
"template": llama3,
"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "assistant",
RoleName: "assistant",
Content: "",
FunctionCall: map[string]string{"function": "test"},
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
"function_response": {
"template": llama3,
"expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "tool",
RoleName: "tool",
Content: "Response from tool",
FunctionCall: nil,
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
}
var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"template": chatML,
"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "user",
RoleName: "user",
Content: "A long time ago in a galaxy far, far away...",
FunctionCall: nil,
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
"assistant": {
"template": chatML,
"expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "assistant",
RoleName: "assistant",
Content: "A long time ago in a galaxy far, far away...",
FunctionCall: nil,
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
"function_call": {
"template": chatML,
"expected": "<|im_start|>assistant\n<tool_call>\n{\"function\":\"test\"}\n</tool_call><|im_end|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "assistant",
RoleName: "assistant",
Content: "",
FunctionCall: map[string]string{"function": "test"},
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
"function_response": {
"template": chatML,
"expected": "<|im_start|>tool\n<tool_response>\nResponse from tool\n</tool_response><|im_end|>",
"data": ChatMessageTemplateData{
SystemPrompt: "",
Role: "tool",
RoleName: "tool",
Content: "Response from tool",
FunctionCall: nil,
FunctionName: "",
LastMessage: false,
Function: false,
MessageIndex: 0,
},
},
}
var _ = Describe("Templates", func() {
Context("chat message ChatML", func() {
var modelLoader *ModelLoader
BeforeEach(func() {
modelLoader = NewModelLoader("")
})
for key := range chatMLTestMatch {
foo := chatMLTestMatch[key]
It("renders correctly `"+key+"`", func() {
templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(ChatMessageTemplateData))
Expect(err).ToNot(HaveOccurred())
Expect(templated).To(Equal(foo["expected"]), templated)
})
}
})
Context("chat message llama3", func() {
var modelLoader *ModelLoader
BeforeEach(func() {
modelLoader = NewModelLoader("")
})
for key := range llama3TestMatch {
foo := llama3TestMatch[key]
It("renders correctly `"+key+"`", func() {
templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(ChatMessageTemplateData))
Expect(err).ToNot(HaveOccurred())
Expect(templated).To(Equal(foo["expected"]), templated)
})
}
})
})

View File

@@ -11,59 +11,41 @@ import (
"github.com/mudler/LocalAI/pkg/utils"
"github.com/Masterminds/sprig/v3"
"github.com/nikolalohinski/gonja/v2"
"github.com/nikolalohinski/gonja/v2/exec"
)
// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go
type TemplateType int
type TemplateCache struct {
mu sync.Mutex
templatesPath string
templates map[TemplateType]map[string]*template.Template
type templateCache struct {
mu sync.Mutex
templatesPath string
templates map[TemplateType]map[string]*template.Template
jinjaTemplates map[TemplateType]map[string]*exec.Template
}
func NewTemplateCache(templatesPath string) *TemplateCache {
tc := &TemplateCache{
templatesPath: templatesPath,
templates: make(map[TemplateType]map[string]*template.Template),
func newTemplateCache(templatesPath string) *templateCache {
tc := &templateCache{
templatesPath: templatesPath,
templates: make(map[TemplateType]map[string]*template.Template),
jinjaTemplates: make(map[TemplateType]map[string]*exec.Template),
}
return tc
}
func (tc *TemplateCache) initializeTemplateMapKey(tt TemplateType) {
func (tc *templateCache) initializeTemplateMapKey(tt TemplateType) {
if _, ok := tc.templates[tt]; !ok {
tc.templates[tt] = make(map[string]*template.Template)
}
}
func (tc *TemplateCache) EvaluateTemplate(templateType TemplateType, templateName string, in interface{}) (string, error) {
tc.mu.Lock()
defer tc.mu.Unlock()
tc.initializeTemplateMapKey(templateType)
m, ok := tc.templates[templateType][templateName]
if !ok {
// return "", fmt.Errorf("template not loaded: %s", templateName)
loadErr := tc.loadTemplateIfExists(templateType, templateName)
if loadErr != nil {
return "", loadErr
}
m = tc.templates[templateType][templateName] // ok is not important since we check m on the next line, and wealready checked
}
if m == nil {
return "", fmt.Errorf("failed loading a template for %s", templateName)
}
var buf bytes.Buffer
if err := m.Execute(&buf, in); err != nil {
return "", err
}
return buf.String(), nil
func (tc *templateCache) existsInModelPath(s string) bool {
return utils.ExistsInPath(tc.templatesPath, s)
}
func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error {
func (tc *templateCache) loadTemplateIfExists(templateType TemplateType, templateName string) error {
// Check if the template was already loaded
if _, ok := tc.templates[templateType][templateName]; ok {
@@ -82,6 +64,51 @@ func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templat
return fmt.Errorf("template file outside path: %s", file)
}
// can either be a file in the system or a string with the template
if tc.existsInModelPath(modelTemplateFile) {
d, err := os.ReadFile(file)
if err != nil {
return err
}
dat = string(d)
} else {
dat = templateName
}
// Parse the template
tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
if err != nil {
return err
}
tc.templates[templateType][templateName] = tmpl
return nil
}
func (tc *templateCache) initializeJinjaTemplateMapKey(tt TemplateType) {
if _, ok := tc.jinjaTemplates[tt]; !ok {
tc.jinjaTemplates[tt] = make(map[string]*exec.Template)
}
}
func (tc *templateCache) loadJinjaTemplateIfExists(templateType TemplateType, templateName string) error {
// Check if the template was already loaded
if _, ok := tc.jinjaTemplates[templateType][templateName]; ok {
return nil
}
// Check if the model path exists
// skip any error here - we run anyway if a template does not exist
modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
dat := ""
file := filepath.Join(tc.templatesPath, modelTemplateFile)
// Security check
if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil {
return fmt.Errorf("template file outside path: %s", file)
}
// can either be a file in the system or a string with the template
if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) {
d, err := os.ReadFile(file)
@@ -93,12 +120,65 @@ func (tc *TemplateCache) loadTemplateIfExists(templateType TemplateType, templat
dat = templateName
}
// Parse the template
tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
tmpl, err := gonja.FromString(dat)
if err != nil {
return err
}
tc.templates[templateType][templateName] = tmpl
tc.jinjaTemplates[templateType][templateName] = tmpl
return nil
}
func (tc *templateCache) evaluateJinjaTemplate(templateType TemplateType, templateNameOrContent string, in map[string]interface{}) (string, error) {
tc.mu.Lock()
defer tc.mu.Unlock()
tc.initializeJinjaTemplateMapKey(templateType)
m, ok := tc.jinjaTemplates[templateType][templateNameOrContent]
if !ok {
// return "", fmt.Errorf("template not loaded: %s", templateName)
loadErr := tc.loadJinjaTemplateIfExists(templateType, templateNameOrContent)
if loadErr != nil {
return "", loadErr
}
m = tc.jinjaTemplates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked
}
if m == nil {
return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent)
}
var buf bytes.Buffer
data := exec.NewContext(in)
if err := m.Execute(&buf, data); err != nil {
return "", err
}
return buf.String(), nil
}
func (tc *templateCache) evaluateTemplate(templateType TemplateType, templateNameOrContent string, in interface{}) (string, error) {
tc.mu.Lock()
defer tc.mu.Unlock()
tc.initializeTemplateMapKey(templateType)
m, ok := tc.templates[templateType][templateNameOrContent]
if !ok {
// return "", fmt.Errorf("template not loaded: %s", templateName)
loadErr := tc.loadTemplateIfExists(templateType, templateNameOrContent)
if loadErr != nil {
return "", loadErr
}
m = tc.templates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked
}
if m == nil {
return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent)
}
var buf bytes.Buffer
if err := m.Execute(&buf, in); err != nil {
return "", err
}
return buf.String(), nil
}

View File

@@ -1,73 +0,0 @@
package templates_test
import (
"os"
"path/filepath"
"github.com/mudler/LocalAI/pkg/templates" // Update with your module path
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("TemplateCache", func() {
var (
templateCache *templates.TemplateCache
tempDir string
)
BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "templates")
Expect(err).NotTo(HaveOccurred())
// Writing example template files
err = os.WriteFile(filepath.Join(tempDir, "example.tmpl"), []byte("Hello, {{.Name}}!"), 0600)
Expect(err).NotTo(HaveOccurred())
err = os.WriteFile(filepath.Join(tempDir, "empty.tmpl"), []byte(""), 0600)
Expect(err).NotTo(HaveOccurred())
templateCache = templates.NewTemplateCache(tempDir)
})
AfterEach(func() {
os.RemoveAll(tempDir) // Clean up
})
Describe("EvaluateTemplate", func() {
Context("when template is loaded successfully", func() {
It("should evaluate the template correctly", func() {
result, err := templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal("Hello, Gopher!"))
})
})
Context("when template isn't a file", func() {
It("should parse from string", func() {
result, err := templateCache.EvaluateTemplate(1, "{{.Name}}", map[string]string{"Name": "Gopher"})
Expect(err).ToNot(HaveOccurred())
Expect(result).To(Equal("Gopher"))
})
})
Context("when template is empty", func() {
It("should return an empty string", func() {
result, err := templateCache.EvaluateTemplate(1, "empty", nil)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(""))
})
})
})
Describe("concurrency", func() {
It("should handle multiple concurrent accesses", func(done Done) {
go func() {
_, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
}()
go func() {
_, _ = templateCache.EvaluateTemplate(1, "example", map[string]string{"Name": "Gopher"})
}()
close(done)
}, 0.1) // timeout in seconds
})
})

295
pkg/templates/evaluator.go Normal file
View File

@@ -0,0 +1,295 @@
package templates
import (
"encoding/json"
"fmt"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/rs/zerolog/log"
)
// Rather than pass an interface{} to the prompt template:
// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
type PromptTemplateData struct {
SystemPrompt string
SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
Input string
Instruction string
Functions []functions.Function
MessageIndex int
}
type ChatMessageTemplateData struct {
SystemPrompt string
Role string
RoleName string
FunctionName string
Content string
MessageIndex int
Function bool
FunctionCall interface{}
LastMessage bool
}
const (
ChatPromptTemplate TemplateType = iota
ChatMessageTemplate
CompletionPromptTemplate
EditPromptTemplate
FunctionsPromptTemplate
)
type Evaluator struct {
cache *templateCache
}
func NewEvaluator(modelPath string) *Evaluator {
return &Evaluator{
cache: newTemplateCache(modelPath),
}
}
func (e *Evaluator) EvaluateTemplateForPrompt(templateType TemplateType, config config.BackendConfig, in PromptTemplateData) (string, error) {
template := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if e.cache.existsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
template = config.Model
}
switch templateType {
case CompletionPromptTemplate:
if config.TemplateConfig.Completion != "" {
template = config.TemplateConfig.Completion
}
case EditPromptTemplate:
if config.TemplateConfig.Edit != "" {
template = config.TemplateConfig.Edit
}
case ChatPromptTemplate:
if config.TemplateConfig.Chat != "" {
template = config.TemplateConfig.Chat
}
case FunctionsPromptTemplate:
if config.TemplateConfig.Functions != "" {
template = config.TemplateConfig.Functions
}
}
if template == "" {
return in.Input, nil
}
if config.TemplateConfig.JinjaTemplate {
return e.evaluateJinjaTemplateForPrompt(templateType, template, in)
}
return e.cache.evaluateTemplate(templateType, template, in)
}
func (e *Evaluator) evaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
return e.cache.evaluateTemplate(ChatMessageTemplate, templateName, messageData)
}
func (e *Evaluator) templateJinjaChat(templateName string, messageData []ChatMessageTemplateData, funcs []functions.Function) (string, error) {
conversation := make(map[string]interface{})
messages := make([]map[string]interface{}, len(messageData))
// convert from ChatMessageTemplateData to what the jinja template expects
for _, message := range messageData {
// TODO: this seems to cover minimum text templates. Can be expanded to cover more complex interactions
var data []byte
data, _ = json.Marshal(message.FunctionCall)
messages = append(messages, map[string]interface{}{
"role": message.RoleName,
"content": message.Content,
"tool_call": string(data),
})
}
conversation["messages"] = messages
// if tools are detected, add these
if len(funcs) > 0 {
conversation["tools"] = funcs
}
return e.cache.evaluateJinjaTemplate(ChatMessageTemplate, templateName, conversation)
}
func (e *Evaluator) evaluateJinjaTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) {
conversation := make(map[string]interface{})
conversation["system_prompt"] = in.SystemPrompt
conversation["content"] = in.Input
return e.cache.evaluateJinjaTemplate(templateType, templateName, conversation)
}
func (e *Evaluator) TemplateMessages(messages []schema.Message, config *config.BackendConfig, funcs []functions.Function, shouldUseFn bool) string {
if config.TemplateConfig.JinjaTemplate {
var messageData []ChatMessageTemplateData
for messageIndex, i := range messages {
fcall := i.FunctionCall
if len(i.ToolCalls) > 0 {
fcall = i.ToolCalls
}
messageData = append(messageData, ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: config.Roles[i.Role],
RoleName: i.Role,
Content: i.StringContent,
FunctionCall: fcall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(messages) - 1)),
MessageIndex: messageIndex,
})
}
templatedInput, err := e.templateJinjaChat(config.TemplateConfig.ChatMessage, messageData, funcs)
if err == nil {
return templatedInput
}
}
var predInput string
suppressConfigSystemPrompt := false
mess := []string{}
for messageIndex, i := range messages {
var content string
role := i.Role
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
roleFn := "assistant_function_call"
r := config.Roles[roleFn]
if r != "" {
role = roleFn
}
}
r := config.Roles[role]
contentExists := i.Content != nil && i.StringContent != ""
fcall := i.FunctionCall
if len(i.ToolCalls) > 0 {
fcall = i.ToolCalls
}
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: r,
RoleName: role,
Content: i.StringContent,
FunctionCall: fcall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(messages) - 1)),
MessageIndex: messageIndex,
}
templatedChatMessage, err := e.evaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
if err != nil {
log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
} else {
if templatedChatMessage == "" {
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
}
log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
content = templatedChatMessage
}
}
marshalAnyRole := func(f any) {
j, err := json.Marshal(f)
if err == nil {
if contentExists {
content += "\n" + fmt.Sprint(r, " ", string(j))
} else {
content = fmt.Sprint(r, " ", string(j))
}
}
}
marshalAny := func(f any) {
j, err := json.Marshal(f)
if err == nil {
if contentExists {
content += "\n" + string(j)
} else {
content = string(j)
}
}
}
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, i.StringContent)
}
if i.FunctionCall != nil {
marshalAnyRole(i.FunctionCall)
}
if i.ToolCalls != nil {
marshalAnyRole(i.ToolCalls)
}
} else {
if contentExists {
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
marshalAny(i.FunctionCall)
}
if i.ToolCalls != nil {
marshalAny(i.ToolCalls)
}
}
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
if contentExists && role == "system" {
suppressConfigSystemPrompt = true
}
}
mess = append(mess, content)
}
joinCharacter := "\n"
if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
}
predInput = strings.Join(mess, joinCharacter)
log.Debug().Msgf("Prompt (before templating): %s", predInput)
promptTemplate := ChatPromptTemplate
if config.TemplateConfig.Functions != "" && shouldUseFn {
promptTemplate = FunctionsPromptTemplate
}
templatedInput, err := e.EvaluateTemplateForPrompt(promptTemplate, *config, PromptTemplateData{
SystemPrompt: config.SystemPrompt,
SuppressSystemPrompt: suppressConfigSystemPrompt,
Input: predInput,
Functions: funcs,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
} else {
log.Debug().Msgf("Template failed loading: %s", err.Error())
}
return predInput
}

View File

@@ -0,0 +1,253 @@
package templates_test
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
. "github.com/mudler/LocalAI/pkg/templates"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
const toolCallJinja = `{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>
' + system_message + '<|eot_id|>' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>
' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>
' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}`
const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>`
const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>`
var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: llama3,
},
},
"functions": []functions.Function{},
"shouldUseFn": false,
"messages": []schema.Message{
{
Role: "user",
StringContent: "A long time ago in a galaxy far, far away...",
},
},
},
"assistant": {
"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: llama3,
},
},
"functions": []functions.Function{},
"messages": []schema.Message{
{
Role: "assistant",
StringContent: "A long time ago in a galaxy far, far away...",
},
},
"shouldUseFn": false,
},
"function_call": {
"expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: llama3,
},
},
"functions": []functions.Function{},
"messages": []schema.Message{
{
Role: "assistant",
FunctionCall: map[string]string{"function": "test"},
},
},
"shouldUseFn": false,
},
"function_response": {
"expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: llama3,
},
},
"functions": []functions.Function{},
"messages": []schema.Message{
{
Role: "tool",
StringContent: "Response from tool",
},
},
"shouldUseFn": false,
},
}
var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: chatML,
},
},
"functions": []functions.Function{},
"shouldUseFn": false,
"messages": []schema.Message{
{
Role: "user",
StringContent: "A long time ago in a galaxy far, far away...",
},
},
},
"assistant": {
"expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: chatML,
},
},
"functions": []functions.Function{},
"messages": []schema.Message{
{
Role: "assistant",
StringContent: "A long time ago in a galaxy far, far away...",
},
},
"shouldUseFn": false,
},
"function_call": {
"expected": "<|im_start|>assistant\n<tool_call>\n{\"function\":\"test\"}\n</tool_call><|im_end|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: chatML,
},
},
"functions": []functions.Function{
{
Name: "test",
Description: "test",
Parameters: nil,
},
},
"shouldUseFn": true,
"messages": []schema.Message{
{
Role: "assistant",
FunctionCall: map[string]string{"function": "test"},
},
},
},
"function_response": {
"expected": "<|im_start|>tool\n<tool_response>\nResponse from tool\n</tool_response><|im_end|>",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: chatML,
},
},
"functions": []functions.Function{},
"shouldUseFn": false,
"messages": []schema.Message{
{
Role: "tool",
StringContent: "Response from tool",
},
},
},
}
var jinjaTest map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"expected": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"config": &config.BackendConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: toolCallJinja,
JinjaTemplate: true,
},
},
"functions": []functions.Function{},
"shouldUseFn": false,
"messages": []schema.Message{
{
Role: "user",
StringContent: "A long time ago in a galaxy far, far away...",
},
},
},
}
var _ = Describe("Templates", func() {
Context("chat message ChatML", func() {
var evaluator *Evaluator
BeforeEach(func() {
evaluator = NewEvaluator("")
})
for key := range chatMLTestMatch {
foo := chatMLTestMatch[key]
It("renders correctly `"+key+"`", func() {
templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
Expect(templated).To(Equal(foo["expected"]), templated)
})
}
})
Context("chat message llama3", func() {
var evaluator *Evaluator
BeforeEach(func() {
evaluator = NewEvaluator("")
})
for key := range llama3TestMatch {
foo := llama3TestMatch[key]
It("renders correctly `"+key+"`", func() {
templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
Expect(templated).To(Equal(foo["expected"]), templated)
})
}
})
Context("chat message jinja", func() {
var evaluator *Evaluator
BeforeEach(func() {
evaluator = NewEvaluator("")
})
for key := range jinjaTest {
foo := jinjaTest[key]
It("renders correctly `"+key+"`", func() {
templated := evaluator.TemplateMessages(foo["messages"].([]schema.Message), foo["config"].(*config.BackendConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
Expect(templated).To(Equal(foo["expected"]), templated)
})
}
})
})