mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 00:59:28 -04:00
Compare commits
27 Commits
dependabot
...
feat/llama
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e9bb4f879 | ||
|
|
3b47122e54 | ||
|
|
379fa3e525 | ||
|
|
e47c58656f | ||
|
|
482314c623 | ||
|
|
e8ae88a2a0 | ||
|
|
e1994579f8 | ||
|
|
e5620989dd | ||
|
|
fc618dcee6 | ||
|
|
e6042080c0 | ||
|
|
0f3b24436d | ||
|
|
4b6f911835 | ||
|
|
a5e28942a6 | ||
|
|
dba9cd7ca4 | ||
|
|
c93190de50 | ||
|
|
4dbf69f889 | ||
|
|
deb430f3ec | ||
|
|
dd8c8778e2 | ||
|
|
06a7b6cadb | ||
|
|
67c8889866 | ||
|
|
1d49041c85 | ||
|
|
2edc4e25b3 | ||
|
|
7888067914 | ||
|
|
9eedbf537a | ||
|
|
69c16481c8 | ||
|
|
56f8a6623f | ||
|
|
4755d676a3 |
@@ -17,19 +17,25 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
|
||||
fi
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
if [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
# ROCm: the GPU does the compute, so a single fallback CPU build is enough.
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
else
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-avx
|
||||
make llama-cpp-avx2
|
||||
make llama-cpp-avx512
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
# arm64: ggml's CPU_ALL_VARIANTS table includes armv9.2 SME variants whose
|
||||
# -march=...+sme is rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so
|
||||
# build the arm64 variants with gcc-14 (the host never *selects* SME unless it has it,
|
||||
# but every variant must still compile).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
# x86 and arm64: one build with ggml CPU_ALL_VARIANTS replaces the per-microarch
|
||||
# binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml dlopens the
|
||||
# best libggml-cpu-*.so at runtime by probing host CPU features.
|
||||
make llama-cpp-cpu-all
|
||||
fi
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
@@ -19,17 +19,19 @@ fi
|
||||
|
||||
cd /LocalAI/backend/cpp/turboquant
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
if [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
# ROCm: single fallback CPU build (GPU does the compute).
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
else
|
||||
make turboquant-avx
|
||||
make turboquant-avx2
|
||||
make turboquant-avx512
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
# arm64: the CPU_ALL_VARIANTS armv9.2 SME variants need gcc-14 (gcc-13 rejects +sme).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
# x86 and arm64: one ggml CPU_ALL_VARIANTS build replaces the per-microarch binaries.
|
||||
make turboquant-cpu-all
|
||||
fi
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
|
||||
IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -50,8 +50,13 @@ add_custom_command(
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
# hw_grpc_proto: force STATIC. Under the CPU_ALL_VARIANTS build BUILD_SHARED_LIBS=ON
|
||||
# (ggml/llama become shared), which would otherwise make this glue library a DSO. As a
|
||||
# DSO it references the hidden-visibility symbols in the static libprotobuf.a, which the
|
||||
# linker cannot satisfy ("hidden symbol ... in libprotobuf.a is referenced by DSO").
|
||||
# Keeping it STATIC links protobuf/gRPC directly into the grpc-server executable while
|
||||
# only ggml/llama stay shared. No effect on the static variants (already BUILD_SHARED_LIBS=OFF).
|
||||
add_library(hw_grpc_proto STATIC
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=7c082bc417bbe53210a83df4ba5b49e18ce6193c
|
||||
LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
@@ -10,8 +10,16 @@ TARGET?=--target grpc-server
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
|
||||
ARCH?=$(shell uname -m)
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
# Shared libs default to OFF: we link static gRPC and the avx/avx2/avx512/fallback
|
||||
# variants are fully static. The CPU_ALL_VARIANTS build flips SHARED_LIBS=ON (ggml/llama
|
||||
# become shared so the dynamic CPU backends work; gRPC stays static via its imported
|
||||
# targets). SHARED_LIBS is a make variable, not an appended -D, so it survives the
|
||||
# recursive sub-make into the VARIANT build dir (which re-parses this Makefile) instead
|
||||
# of being re-clobbered by a second -DBUILD_SHARED_LIBS=OFF. EXTRA_CMAKE_ARGS is the hook
|
||||
# the CPU_ALL_VARIANTS target uses to inject -DGGML_BACKEND_DL/-DGGML_CPU_ALL_VARIANTS.
|
||||
SHARED_LIBS?=OFF
|
||||
EXTRA_CMAKE_ARGS?=
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=$(SHARED_LIBS) -DLLAMA_CURL=OFF $(EXTRA_CMAKE_ARGS)
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
@@ -120,6 +128,30 @@ llama-cpp-fallback: llama.cpp
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
# Single-build CPU backend using ggml's CPU_ALL_VARIANTS. Produces ONE grpc-server
|
||||
# plus a set of dlopen-able libggml-cpu-*.so (sandybridge/haswell/skylakex/...) that
|
||||
# ggml's backend registry selects from at runtime by probing host CPU features.
|
||||
# Replaces the avx/avx2/avx512/fallback multi-binary build on x86.
|
||||
#
|
||||
# CPU_ALL_VARIANTS requires GGML_BACKEND_DL, which requires BUILD_SHARED_LIBS=ON, so we
|
||||
# pass SHARED_LIBS=ON and the DL flags as make variables (NOT pre-expanded into the
|
||||
# CMAKE_ARGS env string): command-line make variables propagate through every recursive
|
||||
# sub-make, so the deepest VARIANT-dir build computes BUILD_SHARED_LIBS=ON consistently.
|
||||
# Only ggml/llama go shared - gRPC is found via its static imported targets, so the
|
||||
# grpc-server binary keeps static gRPC and only dynamically links ggml.
|
||||
#
|
||||
# TARGET adds "ggml": the per-microarch backends are runtime-dlopened, not link deps of
|
||||
# grpc-server, so they only build because each is an add_dependencies() of the ggml target.
|
||||
llama-cpp-cpu-all: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:cpu-all-variants${RESET})
|
||||
$(MAKE) SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" VARIANT="llama-cpp-cpu-all-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/grpc-server llama-cpp-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
|
||||
@@ -14,6 +14,22 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends produced by the CPU_ALL_VARIANTS build (libggml-base.so,
|
||||
# libggml.so, libllama.so and the per-microarch libggml-cpu-*.so), all into package/lib.
|
||||
#
|
||||
# Two distinct resolution mechanisms both land here:
|
||||
# - NEEDED deps (libggml-base/libggml/libllama): resolved by the dynamic linker via the
|
||||
# LD_LIBRARY_PATH=$CURDIR/lib that run.sh exports.
|
||||
# - The per-microarch libggml-cpu-*.so are NOT linked; ggml *discovers* them at runtime by
|
||||
# scanning the executable's own directory (readlink /proc/self/exe). run.sh launches via
|
||||
# the bundled $CURDIR/lib/ld.so, so /proc/self/exe -> .../lib/ld.so and ggml scans lib/.
|
||||
# That is why the variants must sit in lib/ (next to ld.so), not just on the link path.
|
||||
# No-op on builds (arm64/darwin) that don't produce the all-variants set.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -12,26 +12,11 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=llama-cpp-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||
BINARY=llama-cpp-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||
BINARY=llama-cpp-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||
BINARY=llama-cpp-avx512
|
||||
fi
|
||||
# x86 ships a single llama-cpp-cpu-all built with ggml CPU_ALL_VARIANTS: ggml's backend
|
||||
# registry dlopens the best libggml-cpu-*.so for this host, so no shell-side AVX probing.
|
||||
# arm64/darwin builds ship only llama-cpp-fallback, so fall back to it when cpu-all absent.
|
||||
if [ -e $CURDIR/llama-cpp-cpu-all ]; then
|
||||
BINARY=llama-cpp-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
|
||||
@@ -65,6 +65,29 @@ turboquant-avx:
|
||||
turboquant-fallback:
|
||||
$(call turboquant-build,fallback,-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server)
|
||||
|
||||
# Single-build CPU backend via ggml CPU_ALL_VARIANTS (mirrors llama-cpp-cpu-all).
|
||||
# turboquant reuses backend/cpp/llama-cpp's CMakeLists.txt (hw_grpc_proto STATIC) and
|
||||
# Makefile (SHARED_LIBS make-var + EXTRA_CMAKE_ARGS), so this passes the same overrides
|
||||
# through to the copied build: SHARED_LIBS=ON, the DL flags, and --target ggml (which
|
||||
# pulls in the per-microarch libggml-cpu-*.so via ggml's add_dependencies). The .so set
|
||||
# is collected for package.sh to bundle into package/lib.
|
||||
turboquant-cpu-all:
|
||||
rm -rf $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
cp -rf $(LLAMA_CPP_DIR) $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build purge
|
||||
bash $(CURRENT_MAKEFILE_DIR)/patch-grpc-server.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server.cpp
|
||||
$(info $(GREEN)I turboquant build info:cpu-all-variants$(RESET))
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build llama.cpp
|
||||
bash $(CURRENT_MAKEFILE_DIR)/apply-patches.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp $(PATCHES_DIR)
|
||||
SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" \
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server turboquant-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
turboquant-grpc:
|
||||
$(call turboquant-build,grpc,-DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server --target rpc-server)
|
||||
|
||||
|
||||
@@ -14,6 +14,15 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/turboquant-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends from the CPU_ALL_VARIANTS build into package/lib. ggml
|
||||
# discovers the per-microarch libggml-cpu-*.so by scanning the executable directory, which
|
||||
# (via the bundled lib/ld.so that run.sh launches through) resolves to lib/. See the
|
||||
# matching comment in backend/cpp/llama-cpp/package.sh. No-op on the fallback/ROCm builds.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -12,26 +12,11 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=turboquant-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/turboquant-avx ]; then
|
||||
BINARY=turboquant-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/turboquant-avx2 ]; then
|
||||
BINARY=turboquant-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/turboquant-avx512 ]; then
|
||||
BINARY=turboquant-avx512
|
||||
fi
|
||||
# x86/arm64 ship a single turboquant-cpu-all built with ggml CPU_ALL_VARIANTS: ggml's
|
||||
# backend registry dlopens the best libggml-cpu-*.so for this host, so no shell-side
|
||||
# probing. ROCm ships only turboquant-fallback, so fall back to it when cpu-all is absent.
|
||||
if [ -e $CURDIR/turboquant-cpu-all ]; then
|
||||
BINARY=turboquant-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=7a8cb80907341c0204bd0488c1244764f4163883
|
||||
CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=b12098f5d09fc83da36e65c784f7bdb16a5a5ebf
|
||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=5ed76e9a079962f1c85cfce44edd325c27ef1f97
|
||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -215,6 +215,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
|
||||
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
|
||||
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
|
||||
envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
|
||||
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
||||
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
|
||||
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
|
||||
@@ -335,6 +336,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
|
||||
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||
}
|
||||
if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
|
||||
// Request-side default redaction reads this live via
|
||||
// ResolvePIIPolicy, so a file edit takes effect on the next chat
|
||||
// request. The MITM listener resolves its per-host detector map
|
||||
// once at start, so a raw file edit reaches cloud-proxy traffic
|
||||
// only after a restart or a POST /api/settings (which rebuilds
|
||||
// the listener) — the admin UI uses the latter.
|
||||
appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
|
||||
}
|
||||
|
||||
@@ -109,6 +109,52 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// Instance-wide default PII detectors. The file is the only source (no
|
||||
// env var), and the loader runs immediately before startMITMIfConfigured,
|
||||
// so a regression here means the cloud-proxy MITM listener resolves an
|
||||
// empty detector set at boot and forwards intercepted traffic unredacted —
|
||||
// even though pii_default_detectors is on disk and the MITM model has PII
|
||||
// enabled. It also breaks request-side default redaction the same way.
|
||||
Describe("PII default detectors", func() {
|
||||
It("loads pii_default_detectors from the file", func() {
|
||||
cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
|
||||
})
|
||||
|
||||
It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
|
||||
cfg := &config.ApplicationConfig{
|
||||
DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["from-file"]}`),
|
||||
PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
|
||||
}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
|
||||
})
|
||||
})
|
||||
|
||||
// The live file watcher applies pii_default_detectors on a runtime change
|
||||
// the same way it handles galleries/threads/etc.: env-set values (current
|
||||
// == startup snapshot) are left alone, otherwise the file value is applied
|
||||
// to the live config so request-side default redaction picks it up without
|
||||
// a restart.
|
||||
Describe("file watcher: pii_default_detectors", func() {
|
||||
It("applies a changed file value to the live config", func() {
|
||||
startup := config.ApplicationConfig{} // no env baseline
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
|
||||
})
|
||||
|
||||
It("leaves an env-controlled value untouched", func() {
|
||||
startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
|
||||
})
|
||||
})
|
||||
|
||||
// The Agent Pool block has a mix of zero and non-zero defaults
|
||||
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
|
||||
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
|
||||
|
||||
@@ -750,6 +750,20 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
options.MITMListen = *settings.MITMListen
|
||||
}
|
||||
|
||||
// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
|
||||
// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
|
||||
// — apply it only when the env/CLI left the value empty, mirroring the
|
||||
// "env > file" precedence used for the other fields. This must land before
|
||||
// startMITMIfConfigured (called right after this loader): the cloud-proxy
|
||||
// listener resolves each intercept host's detectors once at start via
|
||||
// ResolvePIIPolicy, and a MITM model that names no detectors of its own
|
||||
// falls back to these defaults. Without it the listener (and request-side
|
||||
// default redaction) starts with an empty detector set and forwards
|
||||
// traffic unredacted even though pii_default_detectors is on disk.
|
||||
if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
|
||||
options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
|
||||
// Backend upgrade flags
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
if !options.AutoUpgradeBackends {
|
||||
|
||||
@@ -181,6 +181,8 @@ type RunCMD struct {
|
||||
// Cloud-proxy MITM listener (off by default).
|
||||
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
|
||||
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
|
||||
|
||||
PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
@@ -243,6 +245,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithAPIAddress(r.Address),
|
||||
config.WithMITMListen(r.MITMListen),
|
||||
config.WithMITMCADir(r.MITMCADir),
|
||||
config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
|
||||
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
|
||||
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
|
||||
tunnelEnvVar := strings.Join(tunnels, ",")
|
||||
|
||||
@@ -712,6 +712,18 @@ func WithMITMCADir(dir string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
|
||||
// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
|
||||
// models) that names no pii.detectors of its own. CLI/env:
|
||||
// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
|
||||
// runtime_settings.json / the Middleware UI; a non-empty value takes
|
||||
// precedence over the file (env > file).
|
||||
func WithPIIDefaultDetectors(detectors []string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.PIIDefaultDetectors = detectors
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDir = dynamicConfigsDir
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// runtimeSettingsFile is the on-disk filename inside DynamicConfigsDir.
|
||||
@@ -33,6 +34,35 @@ func (o *ApplicationConfig) ReadPersistedSettings() (RuntimeSettings, error) {
|
||||
return settings, nil
|
||||
}
|
||||
|
||||
// MergeNonNil overlays every set (non-nil) field of overlay onto the
|
||||
// receiver, leaving the receiver's value untouched wherever overlay left a
|
||||
// field unset. Every RuntimeSettings field is a pointer precisely so "set"
|
||||
// can be told apart from "absent" (see the type doc), which makes this a
|
||||
// faithful partial update: a caller that submits only the field it owns
|
||||
// changes exactly that field and never clobbers unrelated settings.
|
||||
//
|
||||
// This is the read-modify-write contract the persistence helpers exist for.
|
||||
// UpdateSettingsEndpoint reads the on-disk settings, merges the request body
|
||||
// on top, and writes the result — so a focused admin page that POSTs only its
|
||||
// own field (the Middleware page sends only mitm_listen; the detector table
|
||||
// only pii_default_detectors) no longer nulls every other setting.
|
||||
//
|
||||
// Reflection keeps the merge total over the struct: a field added to
|
||||
// RuntimeSettings later is merged automatically, so the persistence path can
|
||||
// never silently drop a new setting the way a hand-maintained field list
|
||||
// would. Non-pointer fields (none today) are skipped — they cannot express
|
||||
// "absent", so the receiver wins.
|
||||
func (s *RuntimeSettings) MergeNonNil(overlay RuntimeSettings) {
|
||||
dst := reflect.ValueOf(s).Elem()
|
||||
src := reflect.ValueOf(overlay)
|
||||
for i := 0; i < src.NumField(); i++ {
|
||||
f := src.Field(i)
|
||||
if f.Kind() == reflect.Pointer && !f.IsNil() {
|
||||
dst.Field(i).Set(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WritePersistedSettings serialises the given RuntimeSettings to
|
||||
// runtime_settings.json with restricted permissions (it may carry API
|
||||
// keys and P2P tokens).
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
)
|
||||
|
||||
func strPtr(s string) *string { return &s }
|
||||
func boolPtr(b bool) *bool { return &b }
|
||||
|
||||
var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
var (
|
||||
@@ -51,6 +52,47 @@ var _ = Describe("RuntimeSettings persistence helpers", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// MergeNonNil is the partial-update primitive UpdateSettingsEndpoint
|
||||
// relies on: a focused admin page POSTs only the field it owns, and the
|
||||
// handler reads the on-disk settings and overlays the request on top.
|
||||
// Without it, the body would be written verbatim and every field the
|
||||
// caller omitted would be nulled (the reported regression: changing
|
||||
// mitm_listen wiped the galleries, api keys, watchdog config, etc.).
|
||||
Describe("MergeNonNil partial update", func() {
|
||||
It("overlays set fields and preserves unset ones", func() {
|
||||
base := config.RuntimeSettings{
|
||||
MITMListen: strPtr(":9000"),
|
||||
Galleries: &[]config.Gallery{{Name: "g1", URL: "http://example/g1"}},
|
||||
WatchdogIdleEnabled: boolPtr(true),
|
||||
ApiKeys: &[]string{"persisted-key"},
|
||||
PIIDefaultDetectors: &[]string{"det-a"},
|
||||
}
|
||||
|
||||
// Simulate the Middleware proxy tab: only mitm_listen is sent.
|
||||
overlay := config.RuntimeSettings{MITMListen: strPtr(":8443")}
|
||||
base.MergeNonNil(overlay)
|
||||
|
||||
Expect(base.MITMListen).ToNot(BeNil())
|
||||
Expect(*base.MITMListen).To(Equal(":8443"), "set field should be overlaid")
|
||||
// Everything the overlay left unset must survive untouched.
|
||||
Expect(base.Galleries).ToNot(BeNil(), "galleries were clobbered")
|
||||
Expect(*base.Galleries).To(HaveLen(1))
|
||||
Expect(base.WatchdogIdleEnabled).ToNot(BeNil())
|
||||
Expect(*base.WatchdogIdleEnabled).To(BeTrue())
|
||||
Expect(base.ApiKeys).ToNot(BeNil(), "api_keys were clobbered")
|
||||
Expect(*base.ApiKeys).To(Equal([]string{"persisted-key"}))
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were clobbered")
|
||||
Expect(*base.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
It("lets an explicit empty slice clear a field", func() {
|
||||
base := config.RuntimeSettings{PIIDefaultDetectors: &[]string{"det-a"}}
|
||||
base.MergeNonNil(config.RuntimeSettings{PIIDefaultDetectors: &[]string{}})
|
||||
Expect(base.PIIDefaultDetectors).ToNot(BeNil())
|
||||
Expect(*base.PIIDefaultDetectors).To(BeEmpty(), "an explicit empty slice should clear, not preserve")
|
||||
})
|
||||
})
|
||||
|
||||
// MITM round trip pins the contract that loadRuntimeSettingsFromFile
|
||||
// MITM listener address must survive a write/read round trip so the
|
||||
// next process restart can bring the listener back up. (Intercept
|
||||
|
||||
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
|
||||
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
|
||||
results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
|
||||
if payload.UpdateInterval < 1 {
|
||||
payload.UpdateInterval = 60
|
||||
}
|
||||
if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
49
core/http/endpoints/localai/agent_collections_param_test.go
Normal file
49
core/http/endpoints/localai/agent_collections_param_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// Regression for #10443: agent/collection names carry a "legacy-api-key:"
|
||||
// prefix, so the ':' is percent-encoded as %3A in the request path. Echo routes
|
||||
// such paths via URL.RawPath and stores the path-param value still escaped, so
|
||||
// handlers must URL-decode it before looking the collection up in the store -
|
||||
// otherwise the lookup sees "legacy-api-key%3ALiteraryResearch" and 404s.
|
||||
var _ = Describe("decodedParam", func() {
|
||||
var e *echo.Echo
|
||||
|
||||
BeforeEach(func() {
|
||||
e = echo.New()
|
||||
})
|
||||
|
||||
// route runs a request through Echo's real router so the path param is
|
||||
// populated exactly as it would be in production, then returns the decoded
|
||||
// value the handler would observe.
|
||||
route := func(rawPath string) string {
|
||||
var got string
|
||||
e.GET("/api/agents/collections/:name/upload", func(c echo.Context) error {
|
||||
got = decodedParam(c, "name")
|
||||
return c.NoContent(http.StatusOK)
|
||||
})
|
||||
req := httptest.NewRequest(http.MethodGet, rawPath, nil)
|
||||
rec := httptest.NewRecorder()
|
||||
e.ServeHTTP(rec, req)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK))
|
||||
return got
|
||||
}
|
||||
|
||||
It("decodes a percent-encoded colon in the collection name", func() {
|
||||
got := route("/api/agents/collections/legacy-api-key%3ALiteraryResearch/upload")
|
||||
Expect(got).To(Equal("legacy-api-key:LiteraryResearch"))
|
||||
})
|
||||
|
||||
It("leaves an unencoded name untouched", func() {
|
||||
got := route("/api/agents/collections/PlainCollection/upload")
|
||||
Expect(got).To(Equal("PlainCollection"))
|
||||
})
|
||||
})
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"io"
|
||||
"maps"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
@@ -33,6 +34,22 @@ func getUserID(c echo.Context) string {
|
||||
return user.ID
|
||||
}
|
||||
|
||||
// decodedParam returns the named path parameter, URL-decoding it.
|
||||
//
|
||||
// Echo routes a request via URL.RawPath whenever the path contains
|
||||
// percent-encoded characters (e.g. %3A for ':'), and in that case stores the
|
||||
// matched path-param value raw/escaped. Agent and collection names carry a
|
||||
// "legacy-api-key:" prefix, so the ':' arrives as %3A and the raw param no
|
||||
// longer matches the stored name. Callers must unescape before lookups.
|
||||
// Falls back to the raw value if it isn't valid percent-encoding.
|
||||
func decodedParam(c echo.Context, name string) string {
|
||||
raw := c.Param(name)
|
||||
if decoded, err := url.PathUnescape(raw); err == nil {
|
||||
return decoded
|
||||
}
|
||||
return raw
|
||||
}
|
||||
|
||||
// isAdminUser returns true if the authenticated user has admin role.
|
||||
func isAdminUser(c echo.Context) bool {
|
||||
user := auth.GetUser(c)
|
||||
@@ -127,7 +144,7 @@ func GetAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
statuses := svc.ListAgentsForUser(userID)
|
||||
active, exists := statuses[name]
|
||||
@@ -142,7 +159,7 @@ func UpdateAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
var cfg state.AgentConfig
|
||||
if err := c.Bind(&cfg); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
@@ -161,7 +178,7 @@ func DeleteAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
if err := svc.DeleteAgentForUser(userID, name); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -173,7 +190,7 @@ func GetAgentConfigEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
cfg := svc.GetAgentConfigForUser(userID, name)
|
||||
if cfg == nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": "Agent not found"})
|
||||
@@ -186,7 +203,7 @@ func PauseAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.PauseAgentForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.PauseAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -197,7 +214,7 @@ func ResumeAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResumeAgentForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.ResumeAgentForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -208,7 +225,7 @@ func GetAgentStatusEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
history := svc.GetAgentStatusForUser(userID, name)
|
||||
if history == nil {
|
||||
@@ -241,7 +258,7 @@ func GetAgentObservablesEndpoint(app *application.Application) echo.HandlerFunc
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
history, err := svc.GetAgentObservablesForUser(userID, name)
|
||||
if err != nil {
|
||||
@@ -261,7 +278,7 @@ func ClearAgentObservablesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
if err := svc.ClearAgentObservablesForUser(userID, name); err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -273,7 +290,7 @@ func ChatWithAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
var payload struct {
|
||||
Message string `json:"message"`
|
||||
}
|
||||
@@ -302,7 +319,7 @@ func AgentSSEEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
|
||||
// Try local SSE manager first
|
||||
manager := svc.GetSSEManagerForUser(userID, name)
|
||||
@@ -334,7 +351,7 @@ func ExportAgentEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
data, err := svc.ExportAgentForUser(userID, name)
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
@@ -4,8 +4,6 @@ import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
@@ -110,6 +108,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
})
|
||||
}
|
||||
|
||||
// Read whatever is already persisted: it is both the source of truth
|
||||
// for branding asset filenames (below) and the base we merge this
|
||||
// request onto before writing. A read failure must not let a Save
|
||||
// silently discard the existing settings — surface it instead.
|
||||
persisted, err := appConfig.ReadPersistedSettings()
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to read existing settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
// Branding asset filenames are owned exclusively by
|
||||
// /api/branding/asset/{kind} (upload/delete). The Settings page also
|
||||
// round-trips them via GET /api/settings, but its local state is stale
|
||||
@@ -118,11 +128,9 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
// at page open. Replace whatever the body sent for these three fields
|
||||
// with the values currently on disk so /api/settings can never
|
||||
// regress them.
|
||||
if existing, err := appConfig.ReadPersistedSettings(); err == nil {
|
||||
settings.LogoFile = existing.LogoFile
|
||||
settings.LogoHorizontalFile = existing.LogoHorizontalFile
|
||||
settings.FaviconFile = existing.FaviconFile
|
||||
}
|
||||
settings.LogoFile = persisted.LogoFile
|
||||
settings.LogoHorizontalFile = persisted.LogoHorizontalFile
|
||||
settings.FaviconFile = persisted.FaviconFile
|
||||
|
||||
// The UI reads ApiKeys from GET /api/settings, which already returns the
|
||||
// merged env+runtime list. When the user clicks Save, the same merged
|
||||
@@ -145,16 +153,17 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
settings.ApiKeys = &runtimeOnly
|
||||
}
|
||||
|
||||
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
|
||||
settingsJSON, err := json.MarshalIndent(settings, "", " ")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to marshal settings: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
|
||||
// Persist as a partial update: overlay only the fields this request set
|
||||
// onto the settings already on disk. Focused admin pages POST just the
|
||||
// keys they own (the Middleware proxy tab sends only mitm_listen; the
|
||||
// detector table only pii_default_detectors), so writing the request
|
||||
// body verbatim would null every unrelated setting (the no-omitempty
|
||||
// api_keys / pii_default_detectors fields even round-trip as JSON
|
||||
// null). The full Settings page still round-trips every field, so its
|
||||
// Save is unchanged.
|
||||
toPersist := persisted
|
||||
toPersist.MergeNonNil(settings)
|
||||
if err := appConfig.WritePersistedSettings(toPersist); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
Success: false,
|
||||
Error: "Failed to write settings file: " + err.Error(),
|
||||
@@ -262,7 +271,14 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
if settings.MITMListen != nil {
|
||||
// Rebuild the MITM listener when its address OR the instance-wide
|
||||
// default detectors change. The per-host detector map is resolved once
|
||||
// at listener start (startMITMLocked → ResolvePIIPolicy), so a
|
||||
// default-detector change is otherwise invisible to cloud-proxy traffic
|
||||
// until the next restart — an admin toggling a default detector would
|
||||
// see no redaction. RestartMITM is a no-op when the listener is
|
||||
// disabled (empty address).
|
||||
if settings.MITMListen != nil || settings.PIIDefaultDetectors != nil {
|
||||
if err := app.RestartMITM(); err != nil {
|
||||
xlog.Error("Failed to restart MITM proxy", "error", err)
|
||||
return c.JSON(http.StatusInternalServerError, schema.SettingsResponse{
|
||||
|
||||
@@ -52,6 +52,10 @@ var _ = Describe("Settings endpoints", func() {
|
||||
// Settings are persisted here; set after construction since there's no
|
||||
// dedicated AppOption for it.
|
||||
app.ApplicationConfig().DynamicConfigsDir = tmp
|
||||
// Contain the MITM CA inside tmp too. The partial-save spec flips
|
||||
// mitm_listen, which starts the listener and writes a CA; without this
|
||||
// it defaults to ./mitm-ca and litters the package source tree.
|
||||
app.ApplicationConfig().MITMCADir = filepath.Join(tmp, "mitm-ca")
|
||||
|
||||
e = echo.New()
|
||||
e.GET("/api/settings", GetSettingsEndpoint(app))
|
||||
@@ -109,6 +113,57 @@ var _ = Describe("Settings endpoints", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
})
|
||||
|
||||
// Regression: a focused admin page (the Middleware proxy tab) POSTs only
|
||||
// the one field it owns — mitm_listen. The old handler wrote the request
|
||||
// body verbatim, so every other persisted setting was dropped (and
|
||||
// api_keys / pii_default_detectors, which lack omitempty, were written as
|
||||
// null). A partial POST must now merge onto what is already on disk.
|
||||
It("preserves unrelated persisted settings when a partial POST sets only mitm_listen", func() {
|
||||
// First save establishes a fuller settings file (as the full Settings
|
||||
// page would): galleries, an API key, and the MITM listener. The
|
||||
// listener restart binds a real socket, so use 127.0.0.1:0 for an
|
||||
// ephemeral free port rather than a fixed one that may be in use.
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0","galleries":[{"name":"g1","url":"http://example/g1"}],"api_keys":["k1"],"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
// The Middleware proxy tab then changes only the listen address — the
|
||||
// exact partial body that nulled everything else before the fix.
|
||||
rec = post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
|
||||
raw, err := os.ReadFile(filepath.Join(tmp, "runtime_settings.json"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
var ondisk config.RuntimeSettings
|
||||
Expect(json.Unmarshal(raw, &ondisk)).To(Succeed())
|
||||
|
||||
Expect(ondisk.MITMListen).ToNot(BeNil())
|
||||
Expect(*ondisk.MITMListen).To(Equal("127.0.0.1:0"), "the changed field should be saved")
|
||||
Expect(ondisk.Galleries).ToNot(BeNil(), "galleries were clobbered by the partial save")
|
||||
Expect(*ondisk.Galleries).To(HaveLen(1))
|
||||
Expect(ondisk.ApiKeys).ToNot(BeNil(), "api_keys were nulled by the partial save")
|
||||
Expect(*ondisk.ApiKeys).To(Equal([]string{"k1"}))
|
||||
Expect(ondisk.PIIDefaultDetectors).ToNot(BeNil(), "pii_default_detectors were nulled by the partial save")
|
||||
Expect(*ondisk.PIIDefaultDetectors).To(Equal([]string{"det-a"}))
|
||||
})
|
||||
|
||||
// The MITM listener resolves its per-host PII detectors once at start
|
||||
// (startMITMLocked → ResolvePIIPolicy), and the handler used to restart it
|
||||
// only when mitm_listen changed. So an admin toggling a default detector
|
||||
// (the Middleware detector table POSTs only pii_default_detectors) left
|
||||
// cloud-proxy traffic unredacted until the next reboot. A
|
||||
// pii_default_detectors change must now rebuild the listener.
|
||||
It("rebuilds the MITM listener when only pii_default_detectors changes", func() {
|
||||
rec := post(`{"mitm_listen":"127.0.0.1:0"}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
srv1 := app.MITMServer()
|
||||
Expect(srv1).ToNot(BeNil(), "listener should be running after mitm_listen is set")
|
||||
|
||||
rec = post(`{"pii_default_detectors":["det-a"]}`)
|
||||
Expect(rec.Code).To(Equal(http.StatusOK), rec.Body.String())
|
||||
Expect(app.MITMServer()).ToNot(BeIdenticalTo(srv1),
|
||||
"a default-detector change must restart the listener so it picks up the new detectors")
|
||||
})
|
||||
|
||||
// Residual #9125: enabling the watchdog from a cold (off) state via the
|
||||
// React master toggle must start the live watchdog immediately, without a
|
||||
// restart. The toggle posts watchdog_idle_enabled/busy_enabled=true while
|
||||
|
||||
@@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
||||
if pipeline.SoundDetection == "" {
|
||||
return nil, nil
|
||||
}
|
||||
cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath)
|
||||
cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load sound detection config: %w", err)
|
||||
}
|
||||
@@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
|
||||
}
|
||||
|
||||
func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
|
||||
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
|
||||
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
|
||||
}
|
||||
|
||||
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -542,11 +542,30 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) *
|
||||
}
|
||||
}
|
||||
|
||||
// loadPipelineSubModel loads a pipeline sub-model config by name and follows a
|
||||
// single alias hop, so a pipeline that references an alias (e.g. `llm: default`)
|
||||
// gets the alias target's full config (Backend, Model, ...) rather than the
|
||||
// alias stub with an empty Backend. Without this the alias survives unresolved
|
||||
// into model loading and fails downstream — notably in distributed mode with
|
||||
// "backend name is empty". Mirrors the top-level alias resolution in
|
||||
// core/http/middleware/request.go.
|
||||
func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) {
|
||||
cfg, err := cl.LoadModelConfigFileByName(name, modelPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resolved, _, err := cl.ResolveAlias(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
// returns and loads either a wrapped model or a model that support audio-to-audio
|
||||
func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
|
||||
xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
|
||||
|
||||
cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
|
||||
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -557,7 +576,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
}
|
||||
|
||||
// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
|
||||
cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
|
||||
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -589,7 +608,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
xlog.Debug("Loading a wrapped model")
|
||||
|
||||
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
|
||||
cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
|
||||
cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
@@ -604,7 +623,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
|
||||
applyPipelineReasoning(cfgLLM, *pipeline)
|
||||
applyPipelineThinking(cfgLLM, *pipeline)
|
||||
|
||||
cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
|
||||
cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath)
|
||||
if err != nil {
|
||||
|
||||
return nil, fmt.Errorf("failed to load backend config: %w", err)
|
||||
|
||||
52
core/http/endpoints/openai/realtime_model_alias_test.go
Normal file
52
core/http/endpoints/openai/realtime_model_alias_test.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
)
|
||||
|
||||
// loadPipelineSubModel must resolve a pipeline sub-model that references an
|
||||
// alias (e.g. `llm: default`) one hop to the alias target's full config — so
|
||||
// the effective backend is the target's backend, not the empty backend of the
|
||||
// alias stub. This mirrors the top-level alias resolution done in
|
||||
// core/http/middleware/request.go, which the realtime pipeline previously
|
||||
// skipped (failing in distributed mode with "backend name is empty").
|
||||
var _ = Describe("loadPipelineSubModel", func() {
|
||||
It("resolves a sub-model alias one hop to the target's config", func() {
|
||||
tmpDir := GinkgoT().TempDir()
|
||||
|
||||
// A real model config with a concrete backend.
|
||||
realLLM := `name: real-llm
|
||||
backend: llama-cpp
|
||||
parameters:
|
||||
model: real-llm.gguf
|
||||
`
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed())
|
||||
|
||||
// An alias pointing at the real model.
|
||||
aliasCfg := `name: default
|
||||
alias: real-llm
|
||||
`
|
||||
Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed())
|
||||
|
||||
cl := config.NewModelConfigLoader(tmpDir)
|
||||
Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed())
|
||||
|
||||
// Resolving the alias must follow the hop to the target's full config.
|
||||
resolved, err := loadPipelineSubModel(cl, "default", tmpDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(resolved.IsAlias()).To(BeFalse())
|
||||
Expect(resolved.Backend).To(Equal("llama-cpp"))
|
||||
|
||||
// A non-alias name must load unchanged.
|
||||
direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(direct.Backend).To(Equal("llama-cpp"))
|
||||
Expect(direct.Name).To(Equal("real-llm"))
|
||||
})
|
||||
})
|
||||
@@ -1,100 +0,0 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
// These specs stub /api/features and /api/auth/status per cell. The test server
|
||||
// disables auth (isAdmin=true) and reports its own features, so we intercept
|
||||
// before navigation to simulate each role x mode cell.
|
||||
|
||||
function stubFeatures(page, features) {
|
||||
return page.route('**/api/features', route =>
|
||||
route.fulfill({ contentType: 'application/json', body: JSON.stringify(features) }))
|
||||
}
|
||||
|
||||
function stubNoP2P(page) {
|
||||
// P2P token endpoint returns empty -> p2pEnabled=false.
|
||||
return page.route('**/api/p2p/token', route =>
|
||||
route.fulfill({ contentType: 'text/plain', body: '' }))
|
||||
}
|
||||
|
||||
test.describe('Adaptive landing (HomeRoute)', () => {
|
||||
test('admin + distributed redirects /app to Nodes', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app')
|
||||
await expect(page).toHaveURL(/\/app\/nodes$/)
|
||||
await expect(page.locator('.page-title').first()).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('admin + single-node stays on Home', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app')
|
||||
await expect(page).toHaveURL(/\/app$/)
|
||||
await expect(page.locator('.home-greeting')).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Adaptive sidebar', () => {
|
||||
test('distributed pins the Cluster group with Nodes at the top', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat') // any in-app page so the sidebar is mounted
|
||||
const pinned = page.locator('.sidebar-nav .sidebar-section-items').first()
|
||||
await expect(pinned.getByText('Nodes', { exact: false })).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('single-node does not pin a Cluster group', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
// Nodes is reachable only via the Operate rail, not pinned at the top.
|
||||
await expect(page.locator('.sidebar-nav')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.sidebar-nav .sidebar-section-items').first()
|
||||
.getByText('Nodes', { exact: false })).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Top navbar', () => {
|
||||
test('admin sees the mode pill and settings cog', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__mode')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.top-navbar__icon[aria-label]')).not.toHaveCount(0)
|
||||
})
|
||||
|
||||
test('admin-via-chat jump shows when localai_assistant is enabled', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false, localai_assistant: true })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__assistant')).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('admin-via-chat jump hidden when localai_assistant is off', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false, localai_assistant: false })
|
||||
await stubNoP2P(page)
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__assistant')).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
|
||||
test.describe('Token usage meter', () => {
|
||||
test('renders when admin usage has data', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.route('**/api/auth/admin/usage**', route =>
|
||||
route.fulfill({ contentType: 'application/json',
|
||||
body: JSON.stringify({ buckets: [{ total_tokens: 1234 }] }) }))
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar__meter')).toBeVisible({ timeout: 15_000 })
|
||||
})
|
||||
|
||||
test('hidden when admin usage is empty (graceful degrade)', async ({ page }) => {
|
||||
await stubFeatures(page, { distributed: false })
|
||||
await stubNoP2P(page)
|
||||
await page.route('**/api/auth/admin/usage**', route =>
|
||||
route.fulfill({ contentType: 'application/json', body: JSON.stringify({ buckets: [] }) }))
|
||||
await page.goto('/app/chat')
|
||||
await expect(page.locator('.top-navbar')).toBeVisible({ timeout: 15_000 })
|
||||
await expect(page.locator('.top-navbar__meter')).toHaveCount(0)
|
||||
})
|
||||
})
|
||||
@@ -12,16 +12,6 @@
|
||||
"accountSettings": "Account settings",
|
||||
"account": "Account",
|
||||
"accountFor": "Account: {{name}}",
|
||||
"topbar": {
|
||||
"label": "Top bar",
|
||||
"modeDistributed": "Distributed",
|
||||
"modeSwarm": "Swarm",
|
||||
"modeSingle": "Single-node",
|
||||
"pickModel": "Models",
|
||||
"adminViaChat": "Admin via chat",
|
||||
"tokensToday": "Tokens today",
|
||||
"usageDetail": "View usage detail"
|
||||
},
|
||||
"sections": {
|
||||
"create": "Create",
|
||||
"recognition": "Recognition",
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
"scheduling": {
|
||||
"title": "Penjadwalan",
|
||||
"subtitle": "Aturan penempatan model dan replika di seluruh klaster"
|
||||
"subtitle": "Aturan penempatan model dan replika di seluruh kluster"
|
||||
},
|
||||
"p2p": {
|
||||
"title": "Komputasi AI Terdistribusi",
|
||||
@@ -86,4 +86,4 @@
|
||||
"title": "Penjelajah",
|
||||
"subtitle": "Jelajahi file dan konfigurasi"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@
|
||||
"actions": {
|
||||
"copy": "Salin",
|
||||
"regenerate": "Hasilkan ulang",
|
||||
"jumpToLatest": "Jump to latest"
|
||||
"jumpToLatest": "Lompat ke terbaru"
|
||||
},
|
||||
"streaming": {
|
||||
"transferring": "Mentransfer model...",
|
||||
@@ -115,4 +115,4 @@
|
||||
"clearAll": "Hapus semua",
|
||||
"deleteAllTitle": "Hapus semua percakapan"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"unsaved": {
|
||||
"title": "Discard unsaved changes?",
|
||||
"message": "You have unsaved changes that will be lost if you leave this page.",
|
||||
"leave": "Leave"
|
||||
"title": "Buang perubahan yang belum disimpan?",
|
||||
"message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.",
|
||||
"leave": "Tinggalkan Halaman"
|
||||
},
|
||||
"actions": {
|
||||
"save": "Simpan",
|
||||
|
||||
@@ -7,15 +7,15 @@
|
||||
"resourceGpu": "GPU",
|
||||
"resourceRam": "RAM",
|
||||
"greeting": {
|
||||
"morning": "Good morning",
|
||||
"afternoon": "Good afternoon",
|
||||
"evening": "Good evening",
|
||||
"night": "Working late"
|
||||
"morning": "Selamat pagi",
|
||||
"afternoon": "Selamat siang",
|
||||
"evening": "Selamat malam",
|
||||
"night": "Selamat lembur"
|
||||
},
|
||||
"statusLine": {
|
||||
"modelsLoaded_one": "{{count}} model loaded",
|
||||
"modelsLoaded_other": "{{count}} models loaded",
|
||||
"noModelsLoaded": "No models loaded",
|
||||
"modelsLoaded_one": "{{count}} model dimuat",
|
||||
"modelsLoaded_other": "{{count}} model dimuat",
|
||||
"noModelsLoaded": "Tidak ada model yang dimuat",
|
||||
"nodes_one": "{{count}} node",
|
||||
"nodes_other": "{{count}} nodes"
|
||||
},
|
||||
@@ -79,14 +79,14 @@
|
||||
},
|
||||
"connect": {
|
||||
"title": "Satu endpoint, semua API",
|
||||
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
|
||||
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
|
||||
"nativeTitle": "API native",
|
||||
"compatTitle": "Kompatibilitas drop-in",
|
||||
"apiReference": "Referensi API lengkap",
|
||||
"copy": "Salin",
|
||||
"copied": "Disalin",
|
||||
"browse": "Browse the API",
|
||||
"hide": "Hide endpoints",
|
||||
"dismiss": "Dismiss"
|
||||
"browse": "Jelajahi API",
|
||||
"hide": "Sembunyikan endpoint",
|
||||
"dismiss": "Abaikan"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"video": "Video",
|
||||
"tts": "TTS",
|
||||
"sound": "Suara",
|
||||
"transform": "Transform"
|
||||
"transform": "Transformasi"
|
||||
}
|
||||
},
|
||||
"image": {
|
||||
@@ -30,7 +30,7 @@
|
||||
"refImagesAdded_other": "{{count}} gambar ditambahkan"
|
||||
},
|
||||
"actions": {
|
||||
"view": "View",
|
||||
"view": "Lihat",
|
||||
"generate": "Hasilkan",
|
||||
"generating": "Menghasilkan..."
|
||||
},
|
||||
@@ -153,4 +153,4 @@
|
||||
"clearConfirm": "Hapus",
|
||||
"cleared": "Riwayat dihapus"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,11 +19,11 @@
|
||||
"operate": "Operasikan"
|
||||
},
|
||||
"operate": {
|
||||
"inference": "Inference",
|
||||
"cluster": "Cluster",
|
||||
"observability": "Observability",
|
||||
"access": "Access",
|
||||
"system": "System"
|
||||
"inference": "Inferensi",
|
||||
"cluster": "Kluster",
|
||||
"observability": "Observabilitas",
|
||||
"access": "Akses",
|
||||
"system": "Sistem"
|
||||
},
|
||||
"items": {
|
||||
"home": "Beranda",
|
||||
@@ -64,7 +64,7 @@
|
||||
"copyright": "© 2023-{{year}} {{author}}"
|
||||
},
|
||||
"console": {
|
||||
"automation": "Otomasi",
|
||||
"automation": "Automasi",
|
||||
"training": "Pelatihan"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,50 +184,6 @@
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
/* Desktop top bar: deployment + admin affordances on wide screens. Hidden on
|
||||
mobile, where .mobile-header carries the equivalent actions. */
|
||||
.top-navbar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: var(--spacing-md);
|
||||
padding: var(--spacing-sm) var(--spacing-lg);
|
||||
border-bottom: 1px solid var(--color-border-default);
|
||||
background: var(--color-bg-secondary);
|
||||
}
|
||||
.top-navbar__right { display: flex; align-items: center; gap: var(--spacing-sm); }
|
||||
.top-navbar__mode {
|
||||
font-size: 0.75rem;
|
||||
padding: 2px 10px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid var(--color-border-default);
|
||||
color: var(--color-text-secondary);
|
||||
}
|
||||
.top-navbar__mode.is-active { color: var(--color-success); border-color: var(--color-success); }
|
||||
.top-navbar__btn {
|
||||
display: inline-flex; align-items: center; gap: 6px;
|
||||
font-size: 0.8125rem; padding: 5px 10px; border-radius: 8px;
|
||||
border: 1px solid var(--color-border-default); background: var(--color-bg-tertiary);
|
||||
color: var(--color-text-primary); cursor: pointer;
|
||||
}
|
||||
.top-navbar__icon {
|
||||
width: 32px; height: 32px; display: inline-flex; align-items: center;
|
||||
justify-content: center; border-radius: 8px; border: 1px solid var(--color-border-default);
|
||||
background: var(--color-bg-tertiary); color: var(--color-text-secondary); cursor: pointer;
|
||||
}
|
||||
.top-navbar__avatar img { width: 100%; height: 100%; border-radius: 50%; object-fit: cover; }
|
||||
.top-navbar__meter {
|
||||
display: inline-flex; flex-direction: column; gap: 3px; align-items: flex-start;
|
||||
padding: 4px 10px; border-radius: 8px; border: 1px solid var(--color-border-default);
|
||||
background: var(--color-bg-tertiary); cursor: pointer; min-width: 150px;
|
||||
}
|
||||
.top-navbar__meter-label { font-size: 0.6875rem; color: var(--color-text-secondary); }
|
||||
.top-navbar__meter-bar { width: 100%; height: 5px; border-radius: 3px; background: var(--color-bg-secondary); overflow: hidden; }
|
||||
.top-navbar__meter-bar i { display: block; height: 100%; background: var(--color-primary); }
|
||||
@media (max-width: 639px) {
|
||||
.top-navbar { display: none; }
|
||||
}
|
||||
|
||||
/* Sidebar */
|
||||
.sidebar {
|
||||
position: fixed;
|
||||
|
||||
@@ -3,7 +3,6 @@ import { Outlet, useLocation, useNavigate } from 'react-router-dom'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import Sidebar from './components/Sidebar'
|
||||
import OperationsBar from './components/OperationsBar'
|
||||
import TopNavbar from './components/TopNavbar'
|
||||
import { ToastContainer, useToast } from './components/Toast'
|
||||
import { systemApi } from './utils/api'
|
||||
import { useTheme } from './contexts/ThemeContext'
|
||||
@@ -99,7 +98,6 @@ export default function App() {
|
||||
<Sidebar isOpen={sidebarOpen} onClose={() => setSidebarOpen(false)} />
|
||||
<main className="main-content" {...(sidebarOpen ? { 'aria-hidden': 'true', inert: '' } : {})}>
|
||||
<OperationsBar />
|
||||
<TopNavbar />
|
||||
{/* Mobile header — primary actions reachable without opening the
|
||||
drawer. Hamburger is the only way to expand the nav on phones;
|
||||
theme toggle and account avatar are mirrored from the sidebar
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
import { lazy, Suspense } from 'react'
|
||||
import { Navigate } from 'react-router-dom'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
import { useDeployment } from '../contexts/DeploymentContext'
|
||||
import { resolveHome } from '../utils/resolveHome'
|
||||
import RouteFallback from './RouteFallback'
|
||||
|
||||
const Home = lazy(() => import('../pages/Home'))
|
||||
|
||||
// Index-route element. Waits for auth + deployment signals to load (so we never
|
||||
// flash the wrong landing), then either renders Home or redirects to the cell's
|
||||
// landing page. Redirecting (rather than rendering Nodes/Chat inline at /app)
|
||||
// keeps each target's own route guard, active-nav state, and deep-linkability.
|
||||
export default function HomeRoute() {
|
||||
const { isAdmin, loading: authLoading } = useAuth()
|
||||
const { distributed, p2pEnabled, loading: deployLoading } = useDeployment()
|
||||
|
||||
if (authLoading || deployLoading) return <RouteFallback />
|
||||
|
||||
const target = resolveHome({ isAdmin, distributed, p2pEnabled })
|
||||
if (target) return <Navigate to={target} replace />
|
||||
|
||||
return (
|
||||
<Suspense fallback={<RouteFallback />}>
|
||||
<Home />
|
||||
</Suspense>
|
||||
)
|
||||
}
|
||||
@@ -5,11 +5,9 @@ import ThemeToggle from './ThemeToggle'
|
||||
import LanguageSwitcher from './LanguageSwitcher'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
import { useBranding } from '../contexts/BrandingContext'
|
||||
import { useDeployment } from '../contexts/DeploymentContext'
|
||||
import { apiUrl } from '../utils/basePath'
|
||||
import { preloadRoute } from '../router'
|
||||
import { consoles, firstVisiblePath, consolePaths } from './console/consoleConfig'
|
||||
import { clusterPinItems, shouldCollapseCreate } from '../utils/sidebarPolicy'
|
||||
|
||||
const COLLAPSED_KEY = 'localai_sidebar_collapsed'
|
||||
const SECTIONS_KEY = 'localai_sidebar_sections'
|
||||
@@ -60,13 +58,11 @@ function NavItem({ item, onClose, collapsed }) {
|
||||
)
|
||||
}
|
||||
|
||||
function loadSectionState(collapseCreate = false) {
|
||||
// Tiers render expanded by default; users can collapse any tier and the
|
||||
// choice persists (stored values override defaults). In cluster cells we
|
||||
// start Create collapsed so the pinned cluster group leads - but only when
|
||||
// the user has not already expressed a preference.
|
||||
function loadSectionState() {
|
||||
// Tiers render expanded by default (the redesign favours showing the few
|
||||
// intent groups up front); users can still collapse any tier and the choice
|
||||
// is persisted. Stored values override the defaults so a saved collapse wins.
|
||||
const defaults = Object.fromEntries(sections.map(s => [s.id, true]))
|
||||
if (collapseCreate) defaults.create = false
|
||||
try {
|
||||
const stored = localStorage.getItem(SECTIONS_KEY)
|
||||
return stored ? { ...defaults, ...JSON.parse(stored) } : defaults
|
||||
@@ -81,34 +77,20 @@ function saveSectionState(state) {
|
||||
|
||||
export default function Sidebar({ isOpen, onClose }) {
|
||||
const { t } = useTranslation('nav')
|
||||
const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
|
||||
// Deployment shape (server features + p2p) drives the adaptive sidebar; the
|
||||
// shared context replaces the sidebar's own /api/features fetch so the
|
||||
// landing resolver, navbar, and this policy agree on one snapshot.
|
||||
const deployment = useDeployment()
|
||||
const features = deployment.features
|
||||
// Shared shape for the console gating helpers (consoleConfig.js); in scope for
|
||||
// both the pinned cluster group and the console-tier rendering below.
|
||||
const auth = { isAdmin, authEnabled, hasFeature, features }
|
||||
const collapseCreate = shouldCollapseCreate(auth, deployment)
|
||||
const [features, setFeatures] = useState({})
|
||||
const [collapsed, setCollapsed] = useState(() => {
|
||||
try { return localStorage.getItem(COLLAPSED_KEY) === 'true' } catch (_) { return false }
|
||||
})
|
||||
const [openSections, setOpenSections] = useState(loadSectionState)
|
||||
const { isAdmin, authEnabled, user, logout, hasFeature } = useAuth()
|
||||
const branding = useBranding()
|
||||
const navigate = useNavigate()
|
||||
const location = useLocation()
|
||||
const closeBtnRef = useRef(null)
|
||||
|
||||
// Apply the cluster-cell Create-collapse default once, only when the user has
|
||||
// no stored section preference (so we never override an explicit choice).
|
||||
useEffect(() => {
|
||||
if (deployment.loading) return
|
||||
let hasStored = false
|
||||
try { hasStored = !!localStorage.getItem(SECTIONS_KEY) } catch { hasStored = false }
|
||||
if (hasStored || !collapseCreate) return
|
||||
setOpenSections(prev => (prev.create === false ? prev : { ...prev, create: false }))
|
||||
}, [deployment.loading, collapseCreate])
|
||||
fetch(apiUrl('/api/features')).then(r => r.json()).then(setFeatures).catch(() => {})
|
||||
}, [])
|
||||
|
||||
// Stay in sync with external collapse dispatches (e.g. the chat
|
||||
// page's focus mode). The collapse-toggle button still owns the
|
||||
@@ -175,6 +157,8 @@ export default function Sidebar({ isOpen, onClose }) {
|
||||
}
|
||||
|
||||
const visibleTopItems = topItems.filter(filterItem)
|
||||
// Shared shape for the console gating helpers (consoleConfig.js).
|
||||
const auth = { isAdmin, authEnabled, hasFeature, features }
|
||||
|
||||
// Inline sections (Create) carry no gating; a plain filterItem pass suffices.
|
||||
const getVisibleSectionItems = (section) => section.items.filter(filterItem)
|
||||
@@ -215,28 +199,6 @@ export default function Sidebar({ isOpen, onClose }) {
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Pinned Cluster quick-access (admin + distributed/p2p). Same gate
|
||||
as the Operate rail; surfaced at the top for cluster operators. */}
|
||||
{(() => {
|
||||
const pinned = clusterPinItems(auth, deployment)
|
||||
if (pinned.length === 0) return null
|
||||
return (
|
||||
<div className="sidebar-section">
|
||||
<div className="sidebar-section-title">{t('operate.cluster')}</div>
|
||||
<div className="sidebar-section-items">
|
||||
{pinned.map(item => (
|
||||
<NavItem
|
||||
key={item.path}
|
||||
item={{ path: item.path, icon: item.icon, labelKey: item.labelKey }}
|
||||
onClose={onClose}
|
||||
collapsed={collapsed}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
|
||||
{/* Collapsible sections */}
|
||||
{sections.map(section => {
|
||||
const visibleItems = getVisibleSectionItems(section)
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
import { useNavigate } from 'react-router-dom'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useAuth } from '../context/AuthContext'
|
||||
import { useDeployment } from '../contexts/DeploymentContext'
|
||||
import { useTheme } from '../contexts/ThemeContext'
|
||||
import { launchAssistantChat } from '../utils/launchAssistantChat'
|
||||
import TokenUsageMeter from './navbar/TokenUsageMeter'
|
||||
|
||||
// Desktop top bar. Complementary to the mobile-only header in App.jsx: this is
|
||||
// hidden on small screens (see .top-navbar CSS) and shows deployment/admin
|
||||
// affordances on wide screens where the sidebar footer is far from the content.
|
||||
export default function TopNavbar() {
|
||||
const { t } = useTranslation('nav')
|
||||
const navigate = useNavigate()
|
||||
const { isAdmin, authEnabled, user } = useAuth()
|
||||
const { features, distributed, p2pEnabled } = useDeployment()
|
||||
const { theme, toggleTheme } = useTheme()
|
||||
|
||||
const modeLabel = distributed
|
||||
? t('topbar.modeDistributed')
|
||||
: p2pEnabled
|
||||
? t('topbar.modeSwarm')
|
||||
: t('topbar.modeSingle')
|
||||
|
||||
const showAssistantJump = isAdmin && !!features.localai_assistant
|
||||
const showAvatar = authEnabled && user
|
||||
const themeLabel = theme === 'dark' ? t('switchToLightMode') : t('switchToDarkMode')
|
||||
|
||||
return (
|
||||
<div className="top-navbar" role="navigation" aria-label={t('topbar.label')}>
|
||||
<div className="top-navbar__left">
|
||||
{isAdmin && (
|
||||
<span className={`top-navbar__mode ${distributed || p2pEnabled ? 'is-active' : ''}`}>
|
||||
<i className="fas fa-circle-nodes" aria-hidden="true" /> {modeLabel}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="top-navbar__right">
|
||||
{!isAdmin && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__btn"
|
||||
onClick={() => navigate('/app/chat')}
|
||||
title={t('topbar.pickModel')}
|
||||
>
|
||||
<i className="fas fa-cube" aria-hidden="true" /> {t('topbar.pickModel')}
|
||||
</button>
|
||||
)}
|
||||
{showAssistantJump && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__btn top-navbar__assistant"
|
||||
onClick={() => launchAssistantChat(navigate)}
|
||||
title={t('topbar.adminViaChat')}
|
||||
>
|
||||
<i className="fas fa-user-shield" aria-hidden="true" /> {t('topbar.adminViaChat')}
|
||||
</button>
|
||||
)}
|
||||
{isAdmin && <TokenUsageMeter />}
|
||||
{isAdmin && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__icon"
|
||||
onClick={() => navigate('/app/settings')}
|
||||
aria-label={t('items.settings')}
|
||||
title={t('items.settings')}
|
||||
>
|
||||
<i className="fas fa-cog" aria-hidden="true" />
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__icon"
|
||||
onClick={toggleTheme}
|
||||
aria-label={themeLabel}
|
||||
title={themeLabel}
|
||||
>
|
||||
<i className={`fas ${theme === 'dark' ? 'fa-sun' : 'fa-moon'}`} aria-hidden="true" />
|
||||
</button>
|
||||
{showAvatar && (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__icon top-navbar__avatar"
|
||||
onClick={() => navigate('/app/account')}
|
||||
aria-label={user.name || user.email}
|
||||
title={user.name || user.email}
|
||||
>
|
||||
{user.avatarUrl
|
||||
? <img src={user.avatarUrl} alt="" />
|
||||
: <i className="fas fa-user-circle" aria-hidden="true" />}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
import { useState, useEffect } from 'react'
|
||||
import { useNavigate } from 'react-router-dom'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { usageApi } from '../../utils/api'
|
||||
|
||||
// Compact admin-only usage glance: today's total tokens, optionally against a
|
||||
// quota cap, linking to the full /app/usage page. Self-contained data fetch so
|
||||
// a usage-API failure cannot break the navbar - it just renders nothing.
|
||||
function sumTotalTokens(res) {
|
||||
const buckets = res?.buckets || res?.usage || (Array.isArray(res) ? res : [])
|
||||
if (!Array.isArray(buckets) || buckets.length === 0) return null
|
||||
return buckets.reduce((s, b) => s + (b.total_tokens || 0), 0)
|
||||
}
|
||||
|
||||
export default function TokenUsageMeter() {
|
||||
const { t } = useTranslation('nav')
|
||||
const navigate = useNavigate()
|
||||
const [tokens, setTokens] = useState(null)
|
||||
const [cap, setCap] = useState(null)
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
usageApi.getAdminUsage('day')
|
||||
.then(res => { if (!cancelled) setTokens(sumTotalTokens(res)) })
|
||||
.catch(() => { if (!cancelled) setTokens(null) })
|
||||
usageApi.getMyQuotas()
|
||||
.then(q => { if (!cancelled) setCap(q?.token_limit || q?.tokens?.limit || null) })
|
||||
.catch(() => { if (!cancelled) setCap(null) })
|
||||
return () => { cancelled = true }
|
||||
}, [])
|
||||
|
||||
if (tokens === null) return null
|
||||
|
||||
const pct = cap ? Math.min(100, Math.round((tokens / cap) * 100)) : null
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
className="top-navbar__meter"
|
||||
onClick={() => navigate('/app/usage')}
|
||||
title={t('topbar.usageDetail')}
|
||||
>
|
||||
<span className="top-navbar__meter-label">
|
||||
{t('topbar.tokensToday')}: {Intl.NumberFormat().format(tokens)}
|
||||
{cap ? ` / ${Intl.NumberFormat().format(cap)}` : ''}
|
||||
</span>
|
||||
{pct !== null && (
|
||||
<span className="top-navbar__meter-bar"><i style={{ width: `${pct}%` }} /></span>
|
||||
)}
|
||||
</button>
|
||||
)
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
import { createContext, useContext, useState, useEffect } from 'react'
|
||||
import { apiUrl } from '../utils/basePath'
|
||||
import { p2pApi } from '../utils/api'
|
||||
|
||||
const DeploymentContext = createContext(null)
|
||||
|
||||
// One shared fetch of the deployment-shape signals the adaptive UI keys off:
|
||||
// server features (/api/features) and whether a P2P network token exists.
|
||||
// Components used to fetch /api/features independently (Sidebar, Home); this
|
||||
// centralises it so the landing resolver, sidebar policy, and navbar agree on
|
||||
// one snapshot and we issue a single request.
|
||||
export function DeploymentProvider({ children }) {
|
||||
const [features, setFeatures] = useState({})
|
||||
const [p2pEnabled, setP2pEnabled] = useState(false)
|
||||
const [loading, setLoading] = useState(true)
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
const featuresP = fetch(apiUrl('/api/features'))
|
||||
.then(r => r.json())
|
||||
.catch(() => ({}))
|
||||
// P2P has no /api/features flag: it is "enabled" when a network token
|
||||
// exists (mirrors pages/P2P.jsx). A 404/disabled endpoint throws and we
|
||||
// treat that as not-enabled.
|
||||
const p2pP = p2pApi.getToken()
|
||||
.then(tok => (typeof tok === 'string' ? tok : (tok?.token || '')).trim())
|
||||
.catch(() => '')
|
||||
Promise.all([featuresP, p2pP]).then(([f, tok]) => {
|
||||
if (cancelled) return
|
||||
setFeatures(f || {})
|
||||
setP2pEnabled(!!tok)
|
||||
setLoading(false)
|
||||
})
|
||||
return () => { cancelled = true }
|
||||
}, [])
|
||||
|
||||
const value = {
|
||||
features,
|
||||
distributed: !!features.distributed,
|
||||
p2pEnabled,
|
||||
loading,
|
||||
}
|
||||
|
||||
return (
|
||||
<DeploymentContext.Provider value={value}>
|
||||
{children}
|
||||
</DeploymentContext.Provider>
|
||||
)
|
||||
}
|
||||
|
||||
export function useDeployment() {
|
||||
const ctx = useContext(DeploymentContext)
|
||||
if (!ctx) throw new Error('useDeployment must be used within DeploymentProvider')
|
||||
return ctx
|
||||
}
|
||||
@@ -4,7 +4,6 @@ import { RouterProvider } from 'react-router-dom'
|
||||
import { ThemeProvider } from './contexts/ThemeContext'
|
||||
import { BrandingProvider } from './contexts/BrandingContext'
|
||||
import { AuthProvider } from './context/AuthContext'
|
||||
import { DeploymentProvider } from './contexts/DeploymentContext'
|
||||
import { OperationsProvider } from './contexts/OperationsContext'
|
||||
import { router } from './router'
|
||||
import './i18n'
|
||||
@@ -33,11 +32,9 @@ createRoot(document.getElementById('root')).render(
|
||||
<ThemeProvider>
|
||||
<BrandingProvider>
|
||||
<AuthProvider>
|
||||
<DeploymentProvider>
|
||||
<OperationsProvider>
|
||||
<RouterProvider router={router} />
|
||||
</OperationsProvider>
|
||||
</DeploymentProvider>
|
||||
<OperationsProvider>
|
||||
<RouterProvider router={router} />
|
||||
</OperationsProvider>
|
||||
</AuthProvider>
|
||||
</BrandingProvider>
|
||||
</ThemeProvider>
|
||||
|
||||
@@ -541,73 +541,58 @@ export default function Chat() {
|
||||
updateChatSettings(activeChat.id, { clientMCPServers: next })
|
||||
}, [activeChat, updateChatSettings])
|
||||
|
||||
// Load initial message / assistant launch from the Home page or the navbar
|
||||
// quick-jump. Factored into a callback so both the mount-time reader and the
|
||||
// navbar re-trigger event below consume the same payload through one path.
|
||||
// Load initial message from home page
|
||||
const homeDataProcessed = useRef(false)
|
||||
const consumeHomeChatData = useCallback(() => {
|
||||
const stored = localStorage.getItem('localai_index_chat_data')
|
||||
if (!stored) return
|
||||
try {
|
||||
const data = JSON.parse(stored)
|
||||
localStorage.removeItem('localai_index_chat_data')
|
||||
|
||||
// Two entry shapes from Home:
|
||||
// - "compose-and-send": data.message present → open new chat,
|
||||
// prefill the composer, click submit.
|
||||
// - "open-assistant": no message, just data.localaiAssistant → open
|
||||
// a fresh chat already in admin mode so the wizard can fire.
|
||||
const hasMessage = !!data.message
|
||||
const wantsAssistant = !!data.localaiAssistant
|
||||
|
||||
if (hasMessage || wantsAssistant) {
|
||||
let targetChat = activeChat
|
||||
if (data.newChat) {
|
||||
targetChat = addChat(data.model || '', '', data.mcpMode || false)
|
||||
} else {
|
||||
if (data.model && activeChat) {
|
||||
updateChatSettings(activeChat.id, { model: data.model })
|
||||
}
|
||||
if (data.mcpMode && activeChat) {
|
||||
updateChatSettings(activeChat.id, { mcpMode: true })
|
||||
}
|
||||
}
|
||||
if (data.mcpServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
|
||||
}
|
||||
if (data.clientMCPServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
|
||||
}
|
||||
if (wantsAssistant && targetChat) {
|
||||
updateChatSettings(targetChat.id, { localaiAssistant: true })
|
||||
}
|
||||
if (hasMessage) {
|
||||
setInput(data.message)
|
||||
if (data.files) setFiles(data.files)
|
||||
setTimeout(() => {
|
||||
const submitBtn = document.getElementById('chat-submit-btn')
|
||||
submitBtn?.click()
|
||||
}, 100)
|
||||
}
|
||||
}
|
||||
} catch (_e) { /* ignore */ }
|
||||
}, [activeChat, addChat, updateChatSettings])
|
||||
|
||||
useEffect(() => {
|
||||
if (homeDataProcessed.current) return
|
||||
homeDataProcessed.current = true
|
||||
consumeHomeChatData()
|
||||
}, [consumeHomeChatData])
|
||||
const stored = localStorage.getItem('localai_index_chat_data')
|
||||
if (stored) {
|
||||
homeDataProcessed.current = true
|
||||
try {
|
||||
const data = JSON.parse(stored)
|
||||
localStorage.removeItem('localai_index_chat_data')
|
||||
|
||||
// Admins can re-trigger the assistant jump from the navbar while already on
|
||||
// the chat page; navigate('/app/chat') does not remount Chat, so the
|
||||
// mount-time reader above never fires. The launcher dispatches this event
|
||||
// after writing the payload so we re-consume it and open a fresh assistant.
|
||||
useEffect(() => {
|
||||
const onOpenAssistant = () => consumeHomeChatData()
|
||||
window.addEventListener('localai-open-assistant', onOpenAssistant)
|
||||
return () => window.removeEventListener('localai-open-assistant', onOpenAssistant)
|
||||
}, [consumeHomeChatData])
|
||||
// Two entry shapes from Home:
|
||||
// - "compose-and-send": data.message present → open new chat,
|
||||
// prefill the composer, click submit.
|
||||
// - "open-assistant": no message, just data.localaiAssistant → open
|
||||
// a fresh chat already in admin mode so the wizard can fire.
|
||||
const hasMessage = !!data.message
|
||||
const wantsAssistant = !!data.localaiAssistant
|
||||
|
||||
if (hasMessage || wantsAssistant) {
|
||||
let targetChat = activeChat
|
||||
if (data.newChat) {
|
||||
targetChat = addChat(data.model || '', '', data.mcpMode || false)
|
||||
} else {
|
||||
if (data.model && activeChat) {
|
||||
updateChatSettings(activeChat.id, { model: data.model })
|
||||
}
|
||||
if (data.mcpMode && activeChat) {
|
||||
updateChatSettings(activeChat.id, { mcpMode: true })
|
||||
}
|
||||
}
|
||||
if (data.mcpServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { mcpServers: data.mcpServers })
|
||||
}
|
||||
if (data.clientMCPServers?.length > 0 && targetChat) {
|
||||
updateChatSettings(targetChat.id, { clientMCPServers: data.clientMCPServers })
|
||||
}
|
||||
if (wantsAssistant && targetChat) {
|
||||
updateChatSettings(targetChat.id, { localaiAssistant: true })
|
||||
}
|
||||
if (hasMessage) {
|
||||
setInput(data.message)
|
||||
if (data.files) setFiles(data.files)
|
||||
setTimeout(() => {
|
||||
const submitBtn = document.getElementById('chat-submit-btn')
|
||||
submitBtn?.click()
|
||||
}, 100)
|
||||
}
|
||||
}
|
||||
} catch (_e) { /* ignore */ }
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Track whether the user is pinned to the bottom. If they scroll up
|
||||
// while a response is streaming, stop forcing them back down.
|
||||
|
||||
@@ -13,7 +13,6 @@ import { useResources } from '../hooks/useResources'
|
||||
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
|
||||
import { API_CONFIG } from '../utils/config'
|
||||
import { greetingKey } from '../utils/greeting'
|
||||
import { launchAssistantChat } from '../utils/launchAssistantChat'
|
||||
import StatusPill from '../components/StatusPill'
|
||||
import Skeleton from '../components/Skeleton'
|
||||
import SectionHeading from '../components/SectionHeading'
|
||||
@@ -229,8 +228,16 @@ export default function Home() {
|
||||
// requiring an initial message or model selection. Useful when an admin
|
||||
// wants to start the assistant from a cold home page.
|
||||
const openAssistantChat = useCallback(() => {
|
||||
launchAssistantChat(navigate, selectedModel)
|
||||
const chatData = {
|
||||
model: selectedModel || '',
|
||||
mcpMode: false,
|
||||
localaiAssistant: true,
|
||||
newChat: true,
|
||||
}
|
||||
localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData))
|
||||
try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
|
||||
setAssistantUsed(true)
|
||||
navigate('/app/chat')
|
||||
}, [navigate, selectedModel])
|
||||
|
||||
const handleSubmit = (e) => {
|
||||
|
||||
@@ -6,7 +6,6 @@ import RequireAdmin from './components/RequireAdmin'
|
||||
import RequireAuth from './components/RequireAuth'
|
||||
import RequireAuthEnabled from './components/RequireAuthEnabled'
|
||||
import RequireFeature from './components/RequireFeature'
|
||||
import HomeRoute from './components/HomeRoute'
|
||||
|
||||
// Pages are code-split: each becomes its own chunk loaded on demand, so a route
|
||||
// no longer drags every other page (and its heavy deps — CodeMirror, the MCP
|
||||
@@ -33,7 +32,7 @@ export function preloadRoute(path) {
|
||||
preloaders[m[1] ?? '']?.().catch(() => { /* network blip — real click will retry */ })
|
||||
}
|
||||
|
||||
page('', () => import('./pages/Home'))
|
||||
const Home = page('', () => import('./pages/Home'))
|
||||
const Chat = page('chat', () => import('./pages/Chat'))
|
||||
const Models = page('models', () => import('./pages/Models'))
|
||||
const Manage = page('manage', () => import('./pages/Manage'))
|
||||
@@ -97,7 +96,7 @@ function Feature({ feature, children }) {
|
||||
}
|
||||
|
||||
const appChildren = [
|
||||
{ index: true, element: <HomeRoute /> },
|
||||
{ index: true, element: <Home /> },
|
||||
{ path: 'chat', element: <Chat /> },
|
||||
{ path: 'chat/:model', element: <Chat /> },
|
||||
{ path: 'image', element: <ImageGen /> },
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
// Opens a fresh chat already in LocalAI Assistant ("manage") mode. Chat.jsx
|
||||
// reads localai_index_chat_data on mount and enables localaiAssistant for the
|
||||
// new chat. Shared by the Home CTA and the top navbar quick-jump so there is
|
||||
// one definition of how the assistant is launched.
|
||||
export function launchAssistantChat(navigate, model = '') {
|
||||
const chatData = {
|
||||
model: model || '',
|
||||
mcpMode: false,
|
||||
localaiAssistant: true,
|
||||
newChat: true,
|
||||
}
|
||||
try { localStorage.setItem('localai_index_chat_data', JSON.stringify(chatData)) } catch { /* ignore */ }
|
||||
try { localStorage.setItem('localai_assistant_used', '1') } catch { /* ignore */ }
|
||||
navigate('/app/chat')
|
||||
// When already on /app/chat, navigate() does not remount Chat, so its
|
||||
// mount-time reader would never see the payload above. Signal the mounted
|
||||
// Chat to re-consume it; harmless elsewhere since Chat reads on mount anyway.
|
||||
try { window.dispatchEvent(new CustomEvent('localai-open-assistant')) } catch { /* ignore */ }
|
||||
}
|
||||
11
core/http/react-ui/src/utils/resolveHome.js
vendored
11
core/http/react-ui/src/utils/resolveHome.js
vendored
@@ -1,11 +0,0 @@
|
||||
// Pure landing-page resolver for the index route. Returns a target path, or ''
|
||||
// meaning "render the default Home". Admin precedence is distributed > p2p >
|
||||
// plain; non-admins always go to Chat (distributed/p2p are admin-only and
|
||||
// invisible to them). Visibility gates are enforced elsewhere - this only
|
||||
// chooses where /app lands.
|
||||
export function resolveHome({ isAdmin, distributed, p2pEnabled }) {
|
||||
if (!isAdmin) return '/app/chat'
|
||||
if (distributed) return '/app/nodes'
|
||||
if (p2pEnabled) return '/app/p2p'
|
||||
return ''
|
||||
}
|
||||
20
core/http/react-ui/src/utils/sidebarPolicy.js
vendored
20
core/http/react-ui/src/utils/sidebarPolicy.js
vendored
@@ -1,20 +0,0 @@
|
||||
import { operateConsole, isConsoleItemVisible } from '../components/console/consoleConfig'
|
||||
|
||||
// The Operate > Cluster group, surfaced as a pinned top-of-sidebar quick-access
|
||||
// group when the admin is running a cluster (NATS-distributed) or a P2P swarm.
|
||||
// Items are filtered through the SAME gate as everywhere else, so e.g. in a
|
||||
// p2p-only deployment Nodes/Scheduling (feature: 'distributed') drop out and
|
||||
// only Swarm remains. Returns [] when the pin does not apply.
|
||||
export function clusterPinItems(auth, deployment) {
|
||||
if (!auth.isAdmin) return []
|
||||
if (!deployment.distributed && !deployment.p2pEnabled) return []
|
||||
const group = operateConsole.groups.find(g => g.titleKey === 'operate.cluster')
|
||||
if (!group) return []
|
||||
return group.items.filter(item => isConsoleItemVisible(item, auth))
|
||||
}
|
||||
|
||||
// In the cluster cells the Create group defaults collapsed so the pinned
|
||||
// cluster group leads. Users can still expand it; their stored choice wins.
|
||||
export function shouldCollapseCreate(auth, deployment) {
|
||||
return !!auth.isAdmin && (!!deployment.distributed || !!deployment.p2pEnabled)
|
||||
}
|
||||
@@ -79,21 +79,29 @@ func (s *GalleryStore) Create(op *GalleryOperationRecord) error {
|
||||
}).Create(op).Error
|
||||
}
|
||||
|
||||
// UpdateProgress updates progress for an operation.
|
||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string) error {
|
||||
// UpdateProgress updates progress for an operation. The cancellable flag is
|
||||
// persisted on every tick so a replica that restarts mid-install rehydrates the
|
||||
// op as still cancellable — otherwise the column keeps its Create-time zero
|
||||
// value (false), the UI hides the cancel button, and the orphaned op can only
|
||||
// be dismissed by waiting for the 30-minute stale reaper.
|
||||
func (s *GalleryStore) UpdateProgress(id string, progress float64, message, downloadedSize string, cancellable bool) error {
|
||||
return s.db.Model(&GalleryOperationRecord{}).Where("id = ?", id).Updates(map[string]any{
|
||||
"progress": progress,
|
||||
"message": message,
|
||||
"downloaded_file_size": downloadedSize,
|
||||
"cancellable": cancellable,
|
||||
"updated_at": time.Now(),
|
||||
}).Error
|
||||
}
|
||||
|
||||
// UpdateStatus updates the status of an operation.
|
||||
// UpdateStatus updates the status of an operation. A terminal status is never
|
||||
// cancellable, so the flag is cleared here to keep the persisted row consistent
|
||||
// with what the UI should offer.
|
||||
func (s *GalleryStore) UpdateStatus(id, status, errMsg string) error {
|
||||
updates := map[string]any{
|
||||
"status": status,
|
||||
"updated_at": time.Now(),
|
||||
"status": status,
|
||||
"cancellable": false,
|
||||
"updated_at": time.Now(),
|
||||
}
|
||||
if errMsg != "" {
|
||||
updates["error"] = errMsg
|
||||
|
||||
56
core/services/galleryop/cancellable_persist_test.go
Normal file
56
core/services/galleryop/cancellable_persist_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package galleryop_test
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/distributed"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/testutil"
|
||||
)
|
||||
|
||||
// Reproduces "an in-flight install can't be cancelled after a restart". The
|
||||
// live install path marks OpStatus.Cancellable=true on every progress tick, but
|
||||
// UpdateStatus persisted progress/status to the gallery store WITHOUT the
|
||||
// cancellable flag, and Create defaulted it to false. So after a replica
|
||||
// restart Hydrate rebuilt the op with Cancellable=false, /api/operations
|
||||
// reported cancellable:false, and the UI hid the cancel button — the orphaned
|
||||
// op lingered until the 30-minute stale reaper expired it. The cancellable
|
||||
// state must be persisted so a rehydrated in-flight op stays cancellable.
|
||||
var _ = Describe("GalleryService cancellable persistence across restart", func() {
|
||||
It("rehydrates an in-flight op as still cancellable", func() {
|
||||
db := testutil.SetupTestDB()
|
||||
store, err := distributed.NewGalleryStore(db)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
svc := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
svc.SetGalleryStore(store)
|
||||
|
||||
// Seed the in-flight op row as the worker goroutine does on admission.
|
||||
Expect(store.Create(&distributed.GalleryOperationRecord{
|
||||
ID: "op-inflight",
|
||||
GalleryElementName: "llama-cpp-development",
|
||||
OpType: "backend_install",
|
||||
Status: "pending",
|
||||
})).To(Succeed())
|
||||
|
||||
// Simulate a progress tick: the live path always marks installs
|
||||
// cancellable while they are downloading/processing.
|
||||
svc.UpdateStatus("op-inflight", &galleryop.OpStatus{
|
||||
Message: "downloading",
|
||||
Progress: 25,
|
||||
Cancellable: true,
|
||||
})
|
||||
|
||||
// A fresh replica boots and hydrates from the store.
|
||||
fresh := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
fresh.SetGalleryStore(store)
|
||||
Expect(fresh.Hydrate()).To(Succeed())
|
||||
|
||||
st := fresh.GetStatus("op-inflight")
|
||||
Expect(st).ToNot(BeNil(), "the in-flight op must hydrate after a restart")
|
||||
Expect(st.Cancellable).To(BeTrue(),
|
||||
"a still-active install must rehydrate as cancellable so the admin can dismiss it")
|
||||
})
|
||||
})
|
||||
@@ -167,7 +167,7 @@ func (g *GalleryService) UpdateStatus(s string, op *OpStatus) {
|
||||
xlog.Warn("Failed to persist gallery operation status", "op_id", s, "error", err)
|
||||
}
|
||||
} else {
|
||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize); err != nil {
|
||||
if err := store.UpdateProgress(s, op.Progress, op.Message, op.DownloadedFileSize, op.Cancellable); err != nil {
|
||||
xlog.Warn("Failed to persist gallery operation progress", "op_id", s, "error", err)
|
||||
}
|
||||
}
|
||||
@@ -467,6 +467,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
||||
GalleryElementName: op.GalleryElementName,
|
||||
OpType: "backend_install",
|
||||
Status: "pending",
|
||||
Cancellable: true,
|
||||
})
|
||||
}
|
||||
err := g.backendHandler(&op, systemState)
|
||||
@@ -499,6 +500,8 @@ func (g *GalleryService) Start(c context.Context, cl *config.ModelConfigLoader,
|
||||
GalleryElementName: op.GalleryElementName,
|
||||
OpType: opType,
|
||||
Status: "pending",
|
||||
// A delete is not cancellable; an install is.
|
||||
Cancellable: !op.Delete,
|
||||
})
|
||||
}
|
||||
err := g.modelHandler(&op, cl, systemState)
|
||||
|
||||
@@ -19,25 +19,40 @@ import (
|
||||
// Per-replica: a single tracker instance is bound to (nodeID, modelName, replicaIndex).
|
||||
// The router constructs one tracker per Route() result, so each in-flight tick lands
|
||||
// on the correct row even when multiple replicas of the same model live on the same node.
|
||||
//
|
||||
// Embedding only grpc.ControlBackend (not the whole grpc.Backend) is what makes
|
||||
// the in-flight accounting safe by construction: the control-plane methods pass
|
||||
// through untracked, while every grpc.InferenceBackend method must be declared
|
||||
// explicitly below to satisfy grpc.Backend. Adding an inference method to the
|
||||
// interface therefore breaks this file's build (see the var assertion below)
|
||||
// until it is wrapped with track() - so a new inference path can't be added
|
||||
// without an in-flight accounting decision.
|
||||
type InFlightTrackingClient struct {
|
||||
grpc.Backend // embed for passthrough of untracked methods
|
||||
registry InFlightTracker
|
||||
nodeID string
|
||||
modelName string
|
||||
replicaIndex int
|
||||
grpc.ControlBackend // passthrough for control-plane / streaming-constructor methods
|
||||
inner grpc.InferenceBackend // tracked inference methods delegate here
|
||||
registry InFlightTracker
|
||||
nodeID string
|
||||
modelName string
|
||||
replicaIndex int
|
||||
|
||||
firstOnce sync.Once // guards onFirstComplete
|
||||
onFirstComplete func() // called once after the first tracked inference call completes
|
||||
}
|
||||
|
||||
// Compile-time contract: *InFlightTrackingClient must implement the FULL backend
|
||||
// surface. Because it embeds only ControlBackend, this fails to compile if any
|
||||
// InferenceBackend method is left unwrapped.
|
||||
var _ grpc.Backend = (*InFlightTrackingClient)(nil)
|
||||
|
||||
// NewInFlightTrackingClient wraps a gRPC backend client with in-flight tracking.
|
||||
func NewInFlightTrackingClient(inner grpc.Backend, registry InFlightTracker, nodeID, modelName string, replicaIndex int) *InFlightTrackingClient {
|
||||
return &InFlightTrackingClient{
|
||||
Backend: inner,
|
||||
registry: registry,
|
||||
nodeID: nodeID,
|
||||
modelName: modelName,
|
||||
replicaIndex: replicaIndex,
|
||||
ControlBackend: inner,
|
||||
inner: inner,
|
||||
registry: registry,
|
||||
nodeID: nodeID,
|
||||
modelName: modelName,
|
||||
replicaIndex: replicaIndex,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,154 +106,162 @@ func (c *InFlightTrackingClient) reconcile(err error) error {
|
||||
|
||||
func (c *InFlightTrackingClient) Predict(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.Reply, error) {
|
||||
defer c.track(ctx)()
|
||||
reply, err := c.Backend.Predict(ctx, in, opts...)
|
||||
reply, err := c.inner.Predict(ctx, in, opts...)
|
||||
return reply, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
|
||||
defer c.track(ctx)()
|
||||
return c.reconcile(c.Backend.PredictStream(ctx, in, f, opts...))
|
||||
return c.reconcile(c.inner.PredictStream(ctx, in, f, opts...))
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...ggrpc.CallOption) (*pb.EmbeddingResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Embeddings(ctx, in, opts...)
|
||||
res, err := c.inner.Embeddings(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.GenerateImage(ctx, in, opts...)
|
||||
res, err := c.inner.GenerateImage(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) GenerateVideo(ctx context.Context, in *pb.GenerateVideoRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.GenerateVideo(ctx, in, opts...)
|
||||
res, err := c.inner.GenerateVideo(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) TTS(ctx context.Context, in *pb.TTSRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.TTS(ctx, in, opts...)
|
||||
res, err := c.inner.TTS(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...ggrpc.CallOption) error {
|
||||
defer c.track(ctx)()
|
||||
return c.reconcile(c.Backend.TTSStream(ctx, in, f, opts...))
|
||||
return c.reconcile(c.inner.TTSStream(ctx, in, f, opts...))
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...ggrpc.CallOption) (*pb.Result, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.SoundGeneration(ctx, in, opts...)
|
||||
res, err := c.inner.SoundGeneration(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...ggrpc.CallOption) (*pb.TranscriptResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.AudioTranscription(ctx, in, opts...)
|
||||
res, err := c.inner.AudioTranscription(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...ggrpc.CallOption) error {
|
||||
defer c.track(ctx)()
|
||||
return c.reconcile(c.Backend.AudioTranscriptionStream(ctx, in, f, opts...))
|
||||
return c.reconcile(c.inner.AudioTranscriptionStream(ctx, in, f, opts...))
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Detect(ctx context.Context, in *pb.DetectOptions, opts ...ggrpc.CallOption) (*pb.DetectResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Detect(ctx, in, opts...)
|
||||
res, err := c.inner.Detect(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Depth(ctx context.Context, in *pb.DepthRequest, opts ...ggrpc.CallOption) (*pb.DepthResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Depth(ctx, in, opts...)
|
||||
res, err := c.inner.Depth(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...ggrpc.CallOption) (*pb.RerankResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Rerank(ctx, in, opts...)
|
||||
res, err := c.inner.Rerank(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VAD(ctx context.Context, in *pb.VADRequest, opts ...ggrpc.CallOption) (*pb.VADResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.VAD(ctx, in, opts...)
|
||||
res, err := c.inner.VAD(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...ggrpc.CallOption) (*pb.DiarizeResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Diarize(ctx, in, opts...)
|
||||
res, err := c.inner.Diarize(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...ggrpc.CallOption) (*pb.FaceVerifyResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.FaceVerify(ctx, in, opts...)
|
||||
res, err := c.inner.FaceVerify(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) FaceAnalyze(ctx context.Context, in *pb.FaceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.FaceAnalyzeResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.FaceAnalyze(ctx, in, opts...)
|
||||
res, err := c.inner.FaceAnalyze(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...ggrpc.CallOption) (*pb.VoiceVerifyResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.VoiceVerify(ctx, in, opts...)
|
||||
res, err := c.inner.VoiceVerify(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...ggrpc.CallOption) (*pb.VoiceAnalyzeResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.VoiceAnalyze(ctx, in, opts...)
|
||||
res, err := c.inner.VoiceAnalyze(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...ggrpc.CallOption) (*pb.VoiceEmbedResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.VoiceEmbed(ctx, in, opts...)
|
||||
res, err := c.inner.VoiceEmbed(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...ggrpc.CallOption) (*pb.TokenClassifyResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.TokenClassify(ctx, in, opts...)
|
||||
res, err := c.inner.TokenClassify(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) Score(ctx context.Context, in *pb.ScoreRequest, opts ...ggrpc.CallOption) (*pb.ScoreResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.Score(ctx, in, opts...)
|
||||
res, err := c.inner.Score(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...ggrpc.CallOption) (*pb.SoundDetectionResponse, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.inner.SoundDetection(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...ggrpc.CallOption) (*pb.AudioEncodeResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.AudioEncode(ctx, in, opts...)
|
||||
res, err := c.inner.AudioEncode(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...ggrpc.CallOption) (*pb.AudioDecodeResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.AudioDecode(ctx, in, opts...)
|
||||
res, err := c.inner.AudioDecode(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
func (c *InFlightTrackingClient) AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...ggrpc.CallOption) (*pb.AudioTransformResult, error) {
|
||||
defer c.track(ctx)()
|
||||
res, err := c.Backend.AudioTransform(ctx, in, opts...)
|
||||
res, err := c.inner.AudioTransform(ctx, in, opts...)
|
||||
return res, c.reconcile(err)
|
||||
}
|
||||
|
||||
// AudioTransformStream, AudioToAudioStream and Forward are deliberately left as
|
||||
// embedded passthrough: they return a stream client and the inference spans the
|
||||
// stream's lifetime, not the constructor call. Wrapping the constructor with
|
||||
// track() would increment and immediately decrement (and fire onFirstComplete)
|
||||
// before any audio flows. Tracking those correctly needs the done() func tied to
|
||||
// stream close, which the current Backend interface doesn't surface here.
|
||||
// AudioTransformStream, AudioToAudioStream and Forward live in grpc.ControlBackend
|
||||
// and are passed through via the embedded field, NOT tracked: they return a stream
|
||||
// client and the inference spans the stream's lifetime, not the constructor call.
|
||||
// Wrapping the constructor with track() would increment and immediately decrement
|
||||
// (and fire onFirstComplete) before any audio flows. Tracking those correctly needs
|
||||
// the done() func tied to stream close, which the Backend interface doesn't surface
|
||||
// here. If they ever need tracking, move them to grpc.InferenceBackend - the build
|
||||
// will then force an explicit wrapper here.
|
||||
|
||||
@@ -408,6 +408,13 @@ var _ = Describe("InFlightTrackingClient", func() {
|
||||
return err
|
||||
})
|
||||
})
|
||||
|
||||
It("SoundDetection", func() {
|
||||
assertTracked(func() error {
|
||||
_, err := client.SoundDetection(context.Background(), &pb.SoundDetectionRequest{})
|
||||
return err
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Describe("stale model reload (self-heal)", func() {
|
||||
|
||||
@@ -185,6 +185,13 @@ It is persisted through `POST /api/settings` and read live, so a change takes
|
||||
effect on the next request without a restart. A default that names a model no
|
||||
longer loaded still appears (marked *not loaded*) so it can be toggled off.
|
||||
|
||||
The default set can also be supplied out-of-band with the
|
||||
`LOCALAI_PII_DEFAULT_DETECTORS` environment variable (comma-separated model
|
||||
names, e.g. `privacy-filter-nemotron,secret-filter`). When set it takes
|
||||
precedence over the value persisted via the UI (env > file), which is the
|
||||
right behaviour for immutable container deployments that pin filtering policy
|
||||
at boot rather than via the admin UI.
|
||||
|
||||
This is what makes `cloud-proxy` / MITM redaction work out of the box: those
|
||||
backends default to PII-enabled but ship no detector list, so without a
|
||||
default detector the filter runs with nothing to scan. Set one here and
|
||||
|
||||
@@ -1,4 +1,225 @@
|
||||
---
|
||||
- name: "lfm2.5-1.2b-instruct"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF
|
||||
description: |
|
||||
Try LFM • Docs • LEAP • Discord
|
||||
|
||||
# LFM2.5-1.2B-Instruct
|
||||
|
||||
LFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.
|
||||
|
||||
- **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.
|
||||
- **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.
|
||||
- **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.
|
||||
|
||||
Find more information about LFM2.5 in our blog post.
|
||||
|
||||
## 🗒️ Model Details
|
||||
|
||||
LFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:
|
||||
|
||||
...
|
||||
license: "other"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/dxnYF2fuLpulismtFSGFi.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.15
|
||||
model: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
|
||||
repeat_penalty: 1.05
|
||||
temperature: 0.1
|
||||
top_k: 50
|
||||
top_p: 0.1
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/LFM2.5-1.2B-Instruct-GGUF/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
|
||||
sha256: b1b3de114215d9507409a662a501a631095a479a419584e8a2ded6304b19b4f5
|
||||
uri: https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_K_M.gguf
|
||||
- name: "qwopus3.6-27b-coder-compat-mtp"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF
|
||||
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
|
||||
license: "apache-2.0"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- vision
|
||||
- multimodal
|
||||
- reasoning
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/66309bd090589b7c65950665/sGQKmrMc6L6guMoaB5_Y2.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
- spec_type:draft-mtp
|
||||
- spec_n_max:6
|
||||
- spec_p_min:0.75
|
||||
parameters:
|
||||
model: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
sha256: f893632170124da60e159b7bcc9d91e1cda3014b2c6b8ad9c6cde38a1fcd2f6f
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-Compat-MTP-Q4_K_M.gguf
|
||||
- filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/mmproj-F32.gguf
|
||||
sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-Compat-MTP-GGUF/resolve/main/mmproj-F32.gguf
|
||||
- name: "kimi-k2.7-code"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF
|
||||
description: |
|
||||
## 1. Model Introduction
|
||||
|
||||
Kimi K2.7 Code is a coding-focused agentic model built upon Kimi K2.6. With substantial improvements on real-world long-horizon coding tasks, it strengthens end-to-end task completion across complex software engineering workflows while improving token efficiency, reducing thinking-token usage by approximately 30% compared with Kimi K2.6.
|
||||
|
||||
## 2. Model Summary
|
||||
|
||||
## 3. Evaluation Results
|
||||
|
||||
Benchmark
|
||||
Kimi K2.6
|
||||
Kimi K2.7 Code
|
||||
GPT-5.5
|
||||
Claude Opus 4.8
|
||||
|
||||
Coding
|
||||
|
||||
Kimi Code Bench v2
|
||||
50.9
|
||||
62.0
|
||||
69.0
|
||||
67.4
|
||||
|
||||
Program Bench
|
||||
48.3
|
||||
53.6
|
||||
69.1
|
||||
63.8
|
||||
|
||||
MLS Bench Lite
|
||||
26.7
|
||||
35.1
|
||||
35.5
|
||||
42.8
|
||||
|
||||
Agentic
|
||||
|
||||
Kimi Claw 24/7 Bench
|
||||
42.9
|
||||
46.9
|
||||
52.8
|
||||
50.4
|
||||
|
||||
MCP Atlas
|
||||
69.4
|
||||
76.0
|
||||
79.4
|
||||
81.3
|
||||
|
||||
MCP Mark Verified
|
||||
72.8
|
||||
81.1
|
||||
92.9
|
||||
76.4
|
||||
|
||||
Footnotes
|
||||
|
||||
...
|
||||
license: "other"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
icon: https://huggingface.co/moonshotai/Kimi-K2.7-Code/resolve/main/figures/kimi-logo.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
mmproj: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
min_p: 0.01
|
||||
model: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
repeat_penalty: 1
|
||||
temperature: 0.6
|
||||
top_k: -1
|
||||
top_p: 0.95
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
sha256: 65f0aca336f876902323a90e2aff32cac76d071b2cdd818c6a8d78be8fc2c680
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00001-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
|
||||
sha256: 40f4416c130827a11502778891f4ef95b2144db90f51d63aa3548d0952a39683
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00002-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
|
||||
sha256: ba2ba0b5168784ace7c752ecadfc3631279b2bb023824cb0fe9e2dab3dd28f22
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00003-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
|
||||
sha256: 10298a6c98b13ef49be286fefbea8663e16473fb69bbeabe153bc80c60ae116e
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00004-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
|
||||
sha256: 8e9e4c8e35d34fc4fef6bfb65a715ad7defbd196970d833c1df6924d701c88b3
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00005-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
|
||||
sha256: ccff6e7f299742f82cf6f51a871e3eb3167511efaee967477cc8387f54d16442
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00006-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
|
||||
sha256: 1a3b639633a2d22f71156a9f643ded2329cdd969cc21177b644b5741bac1af8e
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00007-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
|
||||
sha256: bde28f682a1eab973538b2102007d952f37a13c1f7d55e2ed99177445ddc4282
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00008-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
|
||||
sha256: b6a23a95b61e100f7593fa75e2363966323fa767b7e4fdf45d963b59e8fdc69f
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00009-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
|
||||
sha256: fb10231c2e6d76921d40f22690f4aa08a8090c708edeaf7e581abafc24d3b25c
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00010-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
|
||||
sha256: d2290be7ed1a22ac1f9f8a4813389689e075ce2ab8abc3aaaa1157a3cb1462d8
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00011-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
|
||||
sha256: ce0d028314aa3fc783082dbca097e1055d69686a17ab8306574e2949568f26a5
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00012-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
|
||||
sha256: 217864ce63a1d130ab39dcb0996b6097e1aa78eb896e38efaefdbbac3a00b7ec
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00013-of-00014.gguf
|
||||
- filename: llama-cpp/models/Kimi-K2.7-Code-GGUF/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
|
||||
sha256: eb7582ad7066c5eaa01bde95acb00b4ad9cd7b07cd50a6cf5c9ee427258bc9dd
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/UD-Q8_K_XL/Kimi-K2.7-Code-UD-Q8_K_XL-00014-of-00014.gguf
|
||||
- filename: llama-cpp/mmproj/Kimi-K2.7-Code-GGUF/mmproj-F32.gguf
|
||||
sha256: b2cc50c8c13fe70fc4968a83332f31e9007ea09ebb9ae91d46a4e4cd2a3053cd
|
||||
uri: https://huggingface.co/unsloth/Kimi-K2.7-Code-GGUF/resolve/main/mmproj-F32.gguf
|
||||
- name: "qwythos-9b-claude-mythos-5-1m"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
@@ -49,33 +270,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/GLM-5.2-GGUF
|
||||
description: |
|
||||
# GLM-5.2
|
||||
|
||||
👋 Join our WeChat or Discord community.
|
||||
|
||||
📖 Check out the GLM-5.2 blog and GLM-5 Technical report.
|
||||
|
||||
📍 Use GLM-5.2 API services on Z.ai API Platform.
|
||||
|
||||
🔜 Try GLM-5.2 here.
|
||||
|
||||
[Paper]
|
||||
[GitHub]
|
||||
|
||||
## Introduction
|
||||
|
||||
We're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:
|
||||
- **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work
|
||||
- **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency
|
||||
- **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%
|
||||
- **Pure Open**: An MIT open-source license — no regional limits, technical access without borders
|
||||
|
||||
## Benchmark
|
||||
|
||||
## Serve GLM-5.2 Locally
|
||||
|
||||
...
|
||||
description: "# GLM-5.2\n\n\U0001F44B Join our WeChat or Discord community.\n\n\U0001F4D6 Check out the GLM-5.2 blog and GLM-5 Technical report.\n\n\U0001F4CD Use GLM-5.2 API services on Z.ai API Platform.\n\n\U0001F51C Try GLM-5.2 here.\n\n[Paper]\n[GitHub]\n\n## Introduction\n\nWe're introducing GLM-5.2, our latest flagship model for long-horizon tasks. It marks a substantial leap in long-horizon task capability over its predecessor GLM-5.1 and, for the first time, delivers that capability on a **solid 1M-token context**. GLM-5.2's new capabilities include:\n - **Solid 1M Context:** A solid 1M-token context that stably sustains long-horizon work\n - **Advanced Coding with Flexible Effort**: Stronger coding capabilities with multiple thinking effort levels to balance performance and latency\n - **Improved Architecture**: We propose IndexShare, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9× at a 1M context length. We also improve GLM-5.2’s MTP layer for speculative decoding, increasing the acceptance length by up to 20%\n - **Pure Open**: An MIT open-source license — no regional limits, technical access without borders\n\n## Benchmark\n\n## Serve GLM-5.2 Locally\n\n...\n"
|
||||
license: "mit"
|
||||
tags:
|
||||
- llm
|
||||
@@ -198,26 +393,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-v2-MTP-NVFP4-GGUF
|
||||
description: |
|
||||
🪐 Qwopus3.6-27B-v2-MTP
|
||||
MTP Release
|
||||
|
||||
Multi-Token Prediction reasoning model fine-tuned from Qwen3.6-27B
|
||||
|
||||
🧬 Trace Inversion & Negentropy
|
||||
🧠 27B Parameters
|
||||
⚡ Speculative Decoding
|
||||
🛠️ Coding / DevOps / Math
|
||||
|
||||
💡 What is Qwopus3.6-27B-v2-MTP?
|
||||
🪐 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.
|
||||
|
||||
⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.
|
||||
🧩 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.
|
||||
🧪 GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.
|
||||
🚀 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.
|
||||
|
||||
...
|
||||
description: "\U0001FA90 Qwopus3.6-27B-v2-MTP\nMTP Release\n\nMulti-Token Prediction reasoning model fine-tuned from Qwen3.6-27B\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Parameters\n⚡ Speculative Decoding\n\U0001F6E0️ Coding / DevOps / Math\n\n\U0001F4A1 What is Qwopus3.6-27B-v2-MTP?\n\U0001FA90 Qwopus3.6-27B-v2-MTP is a speed-oriented reasoning release built on top of Qwen3.6-27B. It keeps the Qwopus line's focus on reconstructed reasoning traces, coding discipline, DevOps procedures, and mathematical derivations, while adding Multi-Token Prediction for faster generation. The goal is simple: preserve the depth and structure of a 27B reasoning model while making real interactive use noticeably faster.\n\n⚡ MTP DecodingAuxiliary future-token prediction improves throughput on long reasoning, code, math, and strict-format prompts.\n\U0001F9E9 Structured ReasoningInherits the Qwopus training recipe built around reconstructed step-by-step reasoning trajectories.\n\U0001F9EA GB10 TestedValidated on a 30-question local benchmark across Logic, Coding, DevOps, Math, and Edge tasks.\n\U0001F680 Practical SpeedDesigned for workflows where strong answers matter, but waiting several extra minutes per task does not.\n\n...\n"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
@@ -243,28 +419,7 @@
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/michaelw9999/Qwopus3.6-27B-Coder-MTP-NVFP4-GGUF
|
||||
description: |
|
||||
🪐 Qwopus-3.6-27B-Coder
|
||||
Coder SFT Release
|
||||
|
||||
Agentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2
|
||||
|
||||
🧬 Trace Inversion & Negentropy
|
||||
🧠 27B Dense Model
|
||||
⚡ Agentic Coding
|
||||
🛠️ Tool Calling & Agent
|
||||
🏆 SWE-bench Verified: 67.0% (off-thinking)
|
||||
|
||||
💡 What is Qwopus-3.6-27B-Coder?
|
||||
🪐 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.
|
||||
|
||||
🧩 Agentic Coding
|
||||
Optimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.
|
||||
|
||||
🛠️ Tool Calling
|
||||
Learns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.
|
||||
|
||||
...
|
||||
description: "\U0001FA90 Qwopus-3.6-27B-Coder\nCoder SFT Release\n\nAgentic Coding & Tool-Use Reasoning Model Fine-Tuned on Qwopus3.6-27B-v2\n\n\U0001F9EC Trace Inversion & Negentropy\n\U0001F9E0 27B Dense Model\n⚡ Agentic Coding\n\U0001F6E0️ Tool Calling & Agent\n\U0001F3C6 SWE-bench Verified: 67.0% (off-thinking)\n\n\U0001F4A1 What is Qwopus-3.6-27B-Coder?\n\U0001FA90 Qwopus-3.6-27B-Coder is a reasoning-enhanced agentic coding model built on top of Qwopus3.6-27B-v2. It inherits the powerful reasoning foundation of the v2 base — which achieved 87.43% MMLU-Pro (300ex) and 75.25% SWE-bench Verified — and further specializes it for agentic code generation, structured tool calling, debugging, and instruction-following in developer workflows. The model is designed to excel at repository-level coding tasks, multi-turn tool orchestration, and complex logical reasoning under realistic agent environments.\n\n\U0001F9E9 Agentic Coding\nOptimized for repository-level coding, debugging, patch generation, and structured multi-step development workflows.\n\n\U0001F6E0️ Tool Calling\nLearns from real agent trajectories with tool definitions, tool calls, and environment feedback for robust multi-turn execution.\n\n...\n"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
@@ -1484,8 +1639,8 @@
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwopus3.6-27B-v2-MTP-GGUF/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
|
||||
sha256: 818d68223be4d8518dac0b3b5604dde633cbbcbae1f491d842a3e26711c6606d
|
||||
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-v2-MTP-GGUF/resolve/main/Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf
|
||||
sha256: 31cf5fc2406a0c7aaebcc26d440bf0df94e215d0589d5205bf319649c052b50a
|
||||
- name: "qwen3.6-40b-claude-4.6-opus-deckard-heretic-uncensored-thinking-neo-code-di-imatrix-max"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -41,11 +41,34 @@ func buildClient(address string, parallel bool, wd WatchDog, enableWatchDog bool
|
||||
}
|
||||
}
|
||||
|
||||
// Backend is the full client surface of a model backend. It is deliberately
|
||||
// composed of two sub-interfaces so that wrappers can get a COMPILE-TIME
|
||||
// guarantee about which methods they must account for:
|
||||
//
|
||||
// - InferenceBackend - methods that each perform one discrete inference call
|
||||
// (the call begins on entry and ends on return). A wrapper that does
|
||||
// per-call accounting - e.g. the distributed router's in-flight tracker,
|
||||
// core/services/nodes.InFlightTrackingClient - embeds only ControlBackend
|
||||
// and implements every InferenceBackend method explicitly. Adding a method
|
||||
// to InferenceBackend therefore breaks that wrapper's build until it is
|
||||
// implemented: inference can't be added without an accounting decision.
|
||||
// - ControlBackend - everything that is NOT a discrete inference call:
|
||||
// lifecycle/control-plane operations and the streaming constructors whose
|
||||
// work spans the returned stream rather than the constructor call. These
|
||||
// are safe to pass through untracked.
|
||||
//
|
||||
// Keep the two sets disjoint; every backend method belongs to exactly one.
|
||||
type Backend interface {
|
||||
IsBusy() bool
|
||||
HealthCheck(ctx context.Context) (bool, error)
|
||||
InferenceBackend
|
||||
ControlBackend
|
||||
}
|
||||
|
||||
// InferenceBackend is the subset of Backend whose methods each map to a single
|
||||
// inference call. Wrappers that account for in-flight work must implement these
|
||||
// explicitly (see Backend). Do NOT add methods that return a stream client or
|
||||
// that are control-plane only - those belong in ControlBackend.
|
||||
type InferenceBackend interface {
|
||||
Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error)
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
PredictStream(ctx context.Context, in *pb.PredictOptions, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
Predict(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.Reply, error)
|
||||
GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
@@ -53,6 +76,8 @@ type Backend interface {
|
||||
TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TTSStream(ctx context.Context, in *pb.TTSRequest, f func(reply *pb.Reply), opts ...grpc.CallOption) error
|
||||
SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
|
||||
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
|
||||
Detect(ctx context.Context, in *pb.DetectOptions, opts ...grpc.CallOption) (*pb.DetectResponse, error)
|
||||
Depth(ctx context.Context, in *pb.DepthRequest, opts ...grpc.CallOption) (*pb.DepthResponse, error)
|
||||
FaceVerify(ctx context.Context, in *pb.FaceVerifyRequest, opts ...grpc.CallOption) (*pb.FaceVerifyResponse, error)
|
||||
@@ -60,8 +85,25 @@ type Backend interface {
|
||||
VoiceVerify(ctx context.Context, in *pb.VoiceVerifyRequest, opts ...grpc.CallOption) (*pb.VoiceVerifyResponse, error)
|
||||
VoiceAnalyze(ctx context.Context, in *pb.VoiceAnalyzeRequest, opts ...grpc.CallOption) (*pb.VoiceAnalyzeResponse, error)
|
||||
VoiceEmbed(ctx context.Context, in *pb.VoiceEmbedRequest, opts ...grpc.CallOption) (*pb.VoiceEmbedResponse, error)
|
||||
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
|
||||
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
|
||||
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
||||
TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
|
||||
Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
|
||||
VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
|
||||
Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
|
||||
SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
|
||||
AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
|
||||
AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
|
||||
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
|
||||
}
|
||||
|
||||
// ControlBackend is the subset of Backend that is NOT per-call inference:
|
||||
// lifecycle/control-plane operations and the streaming constructors whose work
|
||||
// spans the returned stream rather than the constructor call. In-flight-tracking
|
||||
// wrappers embed this directly and pass it through untracked (see Backend).
|
||||
type ControlBackend interface {
|
||||
IsBusy() bool
|
||||
HealthCheck(ctx context.Context) (bool, error)
|
||||
LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
|
||||
Status(ctx context.Context) (*pb.StatusResponse, error)
|
||||
|
||||
@@ -70,24 +112,11 @@ type Backend interface {
|
||||
StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
|
||||
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
|
||||
|
||||
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
||||
|
||||
TokenClassify(ctx context.Context, in *pb.TokenClassifyRequest, opts ...grpc.CallOption) (*pb.TokenClassifyResponse, error)
|
||||
|
||||
Score(ctx context.Context, in *pb.ScoreRequest, opts ...grpc.CallOption) (*pb.ScoreResponse, error)
|
||||
|
||||
GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)
|
||||
|
||||
VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
|
||||
|
||||
Diarize(ctx context.Context, in *pb.DiarizeRequest, opts ...grpc.CallOption) (*pb.DiarizeResponse, error)
|
||||
|
||||
SoundDetection(ctx context.Context, in *pb.SoundDetectionRequest, opts ...grpc.CallOption) (*pb.SoundDetectionResponse, error)
|
||||
|
||||
AudioEncode(ctx context.Context, in *pb.AudioEncodeRequest, opts ...grpc.CallOption) (*pb.AudioEncodeResult, error)
|
||||
AudioDecode(ctx context.Context, in *pb.AudioDecodeRequest, opts ...grpc.CallOption) (*pb.AudioDecodeResult, error)
|
||||
|
||||
AudioTransform(ctx context.Context, in *pb.AudioTransformRequest, opts ...grpc.CallOption) (*pb.AudioTransformResult, error)
|
||||
// Streaming constructors: these return a stream client immediately; the
|
||||
// actual inference spans the stream's lifetime, not this call, so they are
|
||||
// NOT tracked as a single in-flight unit.
|
||||
AudioTransformStream(ctx context.Context, opts ...grpc.CallOption) (AudioTransformStreamClient, error)
|
||||
AudioToAudioStream(ctx context.Context, opts ...grpc.CallOption) (AudioToAudioStreamClient, error)
|
||||
|
||||
|
||||
@@ -6,10 +6,11 @@ IMAGE_NAME="${IMAGE_NAME:-localai/llama-cpp-darwin}"
|
||||
|
||||
pushd backend/cpp/llama-cpp
|
||||
|
||||
# make llama-cpp-avx && \
|
||||
# make llama-cpp-avx2 && \
|
||||
# make llama-cpp-avx512 && \
|
||||
make llama-cpp-fallback && \
|
||||
# Single build via ggml CPU_ALL_VARIANTS: one binary plus the per-microarch Apple/arm
|
||||
# dylibs (apple_m1/m2_m3/m4, armv8.x) that ggml selects at runtime. GGML_METAL stays ON
|
||||
# and --target ggml also builds ggml-metal (via add_dependencies), so the Metal GPU
|
||||
# backend is still produced as a loadable libggml-metal.dylib.
|
||||
make llama-cpp-cpu-all && \
|
||||
make llama-cpp-grpc && \
|
||||
make llama-cpp-rpc-server
|
||||
|
||||
@@ -19,13 +20,24 @@ mkdir -p build/darwin
|
||||
mkdir -p backend-images
|
||||
mkdir -p build/darwin/lib
|
||||
|
||||
# cp -rf backend/cpp/llama-cpp/llama-cpp-avx build/darwin/
|
||||
# cp -rf backend/cpp/llama-cpp/llama-cpp-avx2 build/darwin/
|
||||
# cp -rf backend/cpp/llama-cpp/llama-cpp-avx512 build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-fallback build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-cpu-all build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-grpc build/darwin/
|
||||
cp -rf backend/cpp/llama-cpp/llama-cpp-rpc-server build/darwin/
|
||||
|
||||
# Distribute the shared ggml/llama libraries from the CPU_ALL_VARIANTS build. Unlike the
|
||||
# old fully-static fallback build, these have @rpath install names, so the otool loop below
|
||||
# (which only copies deps that exist on disk) will not pick them up. The split is by suffix:
|
||||
# - ggml emits its loadable backends (per-microarch CPU variants, metal, blas) with a .so
|
||||
# suffix EVEN ON DARWIN. These go in the package ROOT next to the binary, because darwin
|
||||
# run.sh execs the binary directly (no bundled ld.so) so ggml's executable-directory
|
||||
# scan looks there.
|
||||
# - the core libraries (libggml-base/libggml/libllama/libllama-common/libmtmd) use the
|
||||
# platform .dylib suffix and are NEEDED deps; they go in lib/, resolved at load time via
|
||||
# the DYLD_LIBRARY_PATH=lib that run.sh exports. -a preserves the version symlinks.
|
||||
SHLIBS=backend/cpp/llama-cpp/ggml-shared-libs
|
||||
cp -a $SHLIBS/*.so build/darwin/
|
||||
cp -a $SHLIBS/*.dylib build/darwin/lib/
|
||||
|
||||
# Set default additional libs only for Darwin on M chips (arm64)
|
||||
if [[ "$(uname -s)" == "Darwin" && "$(uname -m)" == "arm64" ]]; then
|
||||
ADDITIONAL_LIBS=${ADDITIONAL_LIBS:-$(ls /opt/homebrew/Cellar/protobuf/**/lib/libutf8_validity*.dylib 2>/dev/null)}
|
||||
|
||||
@@ -53,12 +53,13 @@ var _ = Describe("Gallery Distributed", Label("Distributed"), func() {
|
||||
Expect(retrieved.Status).To(Equal("downloading"))
|
||||
Expect(retrieved.FrontendID).To(Equal("f1"))
|
||||
|
||||
// Update progress
|
||||
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB")).To(Succeed())
|
||||
// Update progress (cancellable: a downloading install can be cancelled)
|
||||
Expect(galleryStore.UpdateProgress(op.ID, 0.75, "75% complete", "6GB", true)).To(Succeed())
|
||||
|
||||
updated, _ := galleryStore.Get(op.ID)
|
||||
Expect(updated.Progress).To(BeNumerically("~", 0.75, 0.01))
|
||||
Expect(updated.Message).To(Equal("75% complete"))
|
||||
Expect(updated.Cancellable).To(BeTrue())
|
||||
|
||||
// Complete
|
||||
Expect(galleryStore.UpdateStatus(op.ID, "completed", "")).To(Succeed())
|
||||
|
||||
@@ -104,11 +104,12 @@ var _ = Describe("Phase 4: MCP, Skills, Gallery, Fine-Tuning", Label("Distribute
|
||||
}
|
||||
stores.Gallery.Create(op)
|
||||
|
||||
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB")).To(Succeed())
|
||||
Expect(stores.Gallery.UpdateProgress(op.ID, 0.5, "50% complete", "2GB", true)).To(Succeed())
|
||||
|
||||
updated, _ := stores.Gallery.Get(op.ID)
|
||||
Expect(updated.Progress).To(BeNumerically("~", 0.5, 0.01))
|
||||
Expect(updated.Message).To(Equal("50% complete"))
|
||||
Expect(updated.Cancellable).To(BeTrue())
|
||||
})
|
||||
|
||||
It("should deduplicate concurrent downloads", func() {
|
||||
|
||||
Reference in New Issue
Block a user