Compare commits

..

2 Commits

Author SHA1 Message Date
Ettore Di Giacinto
56c50c4b66 fix(backends): darwin/metal support across purego Go backends
The parakeet-cpp fix in the previous commit was an instance of a bug
shared by nearly every purego/dlopen Go backend: the dlopen target was
hardcoded to a .so name and run.sh exported only LD_LIBRARY_PATH, so the
backend panicked at startup on macOS/Apple-Metal nodes (dyld needs the
.dylib name and DYLD_LIBRARY_PATH). voxtral was the only backend handling
this correctly.

Apply the same four-layer fix (mirroring backend/go/voxtral) to the
remaining affected backends:

  whisper, sherpa-onnx, ced, stablediffusion-ggml, vibevoice-cpp,
  qwen3-tts-cpp, omnivoice-cpp, crispasr, acestep-cpp, locate-anything-cpp,
  depth-anything-cpp, rfdetr-cpp, sam3-cpp, localvqe

Per backend:
- main.go (sherpa-onnx: backend.go, two libraries): default the dlopen
  target to the .dylib on darwin (runtime.GOOS), .so elsewhere; the
  existing <BACKEND>_LIBRARY env override still wins.
- run.sh: on Darwin set DYLD_LIBRARY_PATH and point <BACKEND>_LIBRARY at
  the packaged .dylib; keep LD_LIBRARY_PATH + the Linux CPU-variant
  (avx/avx2/avx512) selection unchanged in the else branch.
- package.sh: also bundle the .dylib and stop hard-failing when no .so is
  present (the macOS case).
- Makefile: also stage the built .dylib.

Notes:
- stablediffusion-ggml and acestep-cpp build their lib as a CMake MODULE,
  which emits .so (not .dylib) on macOS; run.sh prefers .dylib and falls
  back to .so so both layouts work.
- sherpa-onnx was already partly darwin-aware (Makefile/package.sh); only
  run.sh and the two dlopen defaults needed fixing.

Linux behavior is unchanged. Verified gofmt-clean and
`CGO_ENABLED=0 go build` for every backend.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-24 16:20:57 +00:00
Ettore Di Giacinto
9f7c58d69c fix(parakeet-cpp): darwin/metal support (libparakeet.dylib + DYLD path)
The parakeet-cpp backend had no macOS support and panicked at startup on
Apple/Metal nodes when purego.Dlopen could not find "libparakeet.so".
Fix it across the same four layers the sibling voxtral backend already
handles correctly:

- main.go: default the dlopen target to libparakeet.dylib on darwin
  (runtime.GOOS), libparakeet.so elsewhere; PARAKEET_LIBRARY still wins.
- Makefile: also stage the built libparakeet.dylib next to the Go sources.
- package.sh: accept either the Linux .so[.X.Y] or the macOS .dylib when
  bundling instead of hard-failing when no .so is present (the macOS case);
  note that on Darwin only system frameworks are linked.
- run.sh: on Darwin set DYLD_LIBRARY_PATH and PARAKEET_LIBRARY to the
  packaged .dylib; keep LD_LIBRARY_PATH + .so on Linux.

Mirrors backend/go/voxtral.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-24 16:03:06 +00:00
100 changed files with 519 additions and 1261 deletions

View File

@@ -4974,12 +4974,6 @@ includeDarwin:
- backend: "kitten-tts" - backend: "kitten-tts"
tag-suffix: "-metal-darwin-arm64-kitten-tts" tag-suffix: "-metal-darwin-arm64-kitten-tts"
build-type: "mps" build-type: "mps"
- backend: "trl"
tag-suffix: "-metal-darwin-arm64-trl"
build-type: "mps"
- backend: "liquid-audio"
tag-suffix: "-metal-darwin-arm64-liquid-audio"
build-type: "mps"
- backend: "piper" - backend: "piper"
tag-suffix: "-metal-darwin-arm64-piper" tag-suffix: "-metal-darwin-arm64-piper"
build-type: "metal" build-type: "metal"
@@ -4996,10 +4990,6 @@ includeDarwin:
tag-suffix: "-metal-darwin-arm64-sherpa-onnx" tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
build-type: "metal" build-type: "metal"
lang: "go" lang: "go"
- backend: "supertonic"
tag-suffix: "-metal-darwin-arm64-supertonic"
build-type: "metal"
lang: "go"
- backend: "local-store" - backend: "local-store"
tag-suffix: "-metal-darwin-arm64-local-store" tag-suffix: "-metal-darwin-arm64-local-store"
build-type: "metal" build-type: "metal"

View File

@@ -117,7 +117,8 @@ libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \ cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgoacestepcpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: acestep-cpp test: acestep-cpp
@echo "Running acestep-cpp tests..." @echo "Running acestep-cpp tests..."

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -22,7 +23,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("ACESTEP_LIBRARY") libName := os.Getenv("ACESTEP_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgoacestepcpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgoacestepcpp-fallback.dylib"
} else {
libName = "./libgoacestepcpp-fallback.so"
}
} }
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -13,6 +13,7 @@ mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/acestep-cpp $CURDIR/package/ cp -avf $CURDIR/acestep-cpp $CURDIR/package/
cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/ cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
cp -fv $CURDIR/libgoacestepcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -12,9 +12,19 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single library variant (Metal or Accelerate). The goacestepcpp
# target is built as a CMake MODULE, which emits a .dylib for a SHARED
# build but a .so for a MODULE build on Apple, so prefer .dylib and fall
# back to .so.
LIBRARY="$CURDIR/libgoacestepcpp-fallback.dylib"
if [ ! -e "$LIBRARY" ]; then
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
fi
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
@@ -36,9 +46,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgoacestepcpp-avx512.so" LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export ACESTEP_LIBRARY=$LIBRARY export ACESTEP_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -57,6 +57,7 @@ libced.so: sources/ced.cpp
cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS) cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS) cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
cp -fv sources/ced.cpp/build-shared/libced.dylib ./ 2>/dev/null || true
cp -fv sources/ced.cpp/include/ced_capi.h ./ cp -fv sources/ced.cpp/include/ced_capi.h ./
ced-grpc: libced.so main.go goced.go ced-grpc: libced.so main.go goced.go

View File

@@ -12,6 +12,7 @@ import (
"flag" "flag"
"fmt" "fmt"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ type libFunc struct {
func main() { func main() {
libName := os.Getenv("CED_LIBRARY") libName := os.Getenv("CED_LIBRARY")
if libName == "" { if libName == "" {
libName = "libced.so" if runtime.GOOS == "darwin" {
libName = "libced.dylib"
} else {
libName = "libced.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil { if err != nil {

View File

@@ -15,10 +15,12 @@ mkdir -p "$CURDIR/package/lib"
cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/" cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
cp -avf "$CURDIR/run.sh" "$CURDIR/package/" cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || { cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || true
echo "ERROR: libced.so not found in $CURDIR, run 'make' first" >&2 cp -avf "$CURDIR"/libced.dylib "$CURDIR/package/lib/" 2>/dev/null || true
if ! ls "$CURDIR"/package/lib/libced.* >/dev/null 2>&1; then
echo "ERROR: libced shared library not found in $CURDIR, run 'make' first" >&2
exit 1 exit 1
} fi
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
echo "Detected x86_64 architecture, copying x86_64 libraries..." echo "Detected x86_64 architecture, copying x86_64 libraries..."

View File

@@ -3,7 +3,12 @@ set -e
CURDIR=$(dirname "$(realpath "$0")") CURDIR=$(dirname "$(realpath "$0")")
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}" if [ "$(uname)" = "Darwin" ]; then
export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
export CED_LIBRARY="$CURDIR/lib/libced.dylib"
else
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
fi
# If a self-contained ld.so was packaged, route through it so the packaged # If a self-contained ld.so was packaged, route through it so the packaged
# libc / libstdc++ are used instead of the host's (matches the sibling backends). # libc / libstdc++ are used instead of the host's (matches the sibling backends).

View File

@@ -75,7 +75,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
else else
VARIANT_TARGETS = libgocrispasr-fallback.so # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
VARIANT_TARGETS = libgocrispasr-fallback.dylib
endif endif
crispasr: main.go gocrispasr.go $(VARIANT_TARGETS) crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
@@ -87,7 +88,7 @@ package: crispasr
build: package build: package
clean: purge clean: purge
rm -rf libgocrispasr*.so package sources/CrispASR crispasr rm -rf libgocrispasr*.so libgocrispasr*.dylib package sources/CrispASR crispasr
purge: purge:
rm -rf build* rm -rf build*
@@ -118,13 +119,21 @@ libgocrispasr-fallback.so: sources/CrispASR
SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
rm -rfv build* rm -rfv build*
# Build fallback variant as a dylib (Darwin)
libgocrispasr-fallback.dylib: sources/CrispASR
$(MAKE) purge
$(info ${GREEN}I crispasr build info:fallback (dylib)${RESET})
SO_TARGET=libgocrispasr-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
rm -rfv build*
libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \ cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgocrispasr.dylib ./$(SO_TARGET) 2>/dev/null)
test: crispasr test: crispasr
CGO_ENABLED=0 $(GOCMD) test -v ./... CGO_ENABLED=0 $(GOCMD) test -v ./...

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() { func main() {
libName := os.Getenv("CRISPASR_LIBRARY") libName := os.Getenv("CRISPASR_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgocrispasr-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgocrispasr-fallback.dylib"
} else {
libName = "./libgocrispasr-fallback.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/crispasr $CURDIR/package/ cp -avf $CURDIR/crispasr $CURDIR/package/
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libgocrispasr-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgocrispasr-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgocrispasr-avx.so ]; then if [ -e $CURDIR/libgocrispasr-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgocrispasr-avx512.so" LIBRARY="$CURDIR/libgocrispasr-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export CRISPASR_LIBRARY=$LIBRARY export CRISPASR_LIBRARY=$LIBRARY
# Point piper's espeak-ng phonemizer at the bundled voice data. The variable # Point piper's espeak-ng phonemizer at the bundled voice data. The variable

View File

@@ -77,7 +77,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
else else
# On non-Linux (e.g., Darwin), build only fallback variant # On non-Linux (e.g., Darwin), build only fallback variant
VARIANT_TARGETS = libdepthanythingcpp-fallback.so VARIANT_TARGETS = libdepthanythingcpp-fallback.dylib
endif endif
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS) depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
@@ -89,7 +89,7 @@ package: depth-anything-cpp
build: package build: package
clean: purge clean: purge
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources rm -rf libdepthanythingcpp*.so libdepthanythingcpp*.dylib depth-anything-cpp package sources
purge: purge:
rm -rf build* rm -rf build*
@@ -116,11 +116,19 @@ libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
endif endif
# Build fallback variant (all platforms) # Build fallback variant (all platforms)
ifeq ($(UNAME_S),Darwin)
libdepthanythingcpp-fallback.dylib: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
else
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
rm -rfv build-$@ rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET}) $(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@ rm -rfv build-$@
endif
libdepthanythingcpp-custom: CMakeLists.txt libdepthanythingcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
@@ -128,7 +136,8 @@ libdepthanythingcpp-custom: CMakeLists.txt
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libdepthanything.dylib ./$(SO_TARGET) 2>/dev/null)
all: depth-anything-cpp package all: depth-anything-cpp package

View File

@@ -9,6 +9,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("DEPTHANYTHING_LIBRARY") libName := os.Getenv("DEPTHANYTHING_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libdepthanythingcpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libdepthanythingcpp-fallback.dylib"
} else {
libName = "./libdepthanythingcpp-fallback.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory # Create lib directory
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ cp -fv $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libdepthanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/ cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so" LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export DEPTHANYTHING_LIBRARY=$LIBRARY export DEPTHANYTHING_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -67,8 +67,9 @@ $(LIB_SENTINEL): sources/LocalVQE
# that the loader picks at runtime. We must build every target — the # that the loader picks at runtime. We must build every target — the
# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY # default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
# routes all of them into build/bin; copy them out next to the binary. # routes all of them into build/bin; copy them out next to the binary.
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.so* . cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/bin/liblocalvqe.dylib . 2>/dev/null || cp -P build/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.dylib .
cp -P build/bin/libggml*.so* . 2>/dev/null || true cp -P build/bin/libggml*.so* . 2>/dev/null || true
cp -P build/bin/libggml*.dylib . 2>/dev/null || true
touch $(LIB_SENTINEL) touch $(LIB_SENTINEL)
liblocalvqe.so: $(LIB_SENTINEL) liblocalvqe.so: $(LIB_SENTINEL)

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() { func main() {
libName := os.Getenv("LOCALVQE_LIBRARY") libName := os.Getenv("LOCALVQE_LIBRARY")
if libName == "" { if libName == "" {
libName = "./liblocalvqe.so" if runtime.GOOS == "darwin" {
libName = "./liblocalvqe.dylib"
} else {
libName = "./liblocalvqe.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -15,7 +15,9 @@ cp -avf $CURDIR/localvqe $CURDIR/package/
# liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime # liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
# variants — LocalVQE picks the matching CPU variant at load time. # variants — LocalVQE picks the matching CPU variant at load time.
cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
cp -P $CURDIR/liblocalvqe.dylib $CURDIR/package/ 2>/dev/null || true
cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
cp -P $CURDIR/libggml*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -10,8 +10,19 @@ CURDIR=$(dirname "$(realpath $0)")
# exec'ing the binary. # exec'ing the binary.
cd "$CURDIR" cd "$CURDIR"
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH if [ "$(uname)" = "Darwin" ]; then
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so # macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
# DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
export DYLD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$DYLD_LIBRARY_PATH
LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.dylib
if [ ! -e "$LOCALVQE_LIBRARY" ]; then
LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
fi
export LOCALVQE_LIBRARY
else
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
fi
if [ -f $CURDIR/lib/ld.so ]; then if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so" echo "Using lib/ld.so"

View File

@@ -70,7 +70,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
else else
# On non-Linux (e.g., Darwin), build only fallback variant # On non-Linux (e.g., Darwin), build only fallback variant
VARIANT_TARGETS = liblocateanythingcpp-fallback.so VARIANT_TARGETS = liblocateanythingcpp-fallback.dylib
endif endif
locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS) locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
@@ -82,7 +82,7 @@ package: locate-anything-cpp
build: package build: package
clean: purge clean: purge
rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources rm -rf liblocateanythingcpp*.so liblocateanythingcpp*.dylib locate-anything-cpp package sources
purge: purge:
rm -rf build* rm -rf build*
@@ -109,11 +109,19 @@ liblocateanythingcpp-avx512.so: sources/locate-anything.cpp
endif endif
# Build fallback variant (all platforms) # Build fallback variant (all platforms)
ifeq ($(UNAME_S),Darwin)
liblocateanythingcpp-fallback.dylib: sources/locate-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
rm -rfv build-$@
else
liblocateanythingcpp-fallback.so: sources/locate-anything.cpp liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
rm -rfv build-$@ rm -rfv build-$@
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET}) $(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
rm -rfv build-$@ rm -rfv build-$@
endif
liblocateanythingcpp-custom: CMakeLists.txt liblocateanythingcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
@@ -121,7 +129,8 @@ liblocateanythingcpp-custom: CMakeLists.txt
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/liblocateanythingcpp.dylib ./$(SO_TARGET) 2>/dev/null)
all: locate-anything-cpp package all: locate-anything-cpp package

View File

@@ -9,6 +9,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("LOCATEANYTHING_LIBRARY") libName := os.Getenv("LOCATEANYTHING_LIBRARY")
if libName == "" { if libName == "" {
libName = "./liblocateanythingcpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./liblocateanythingcpp-fallback.dylib"
} else {
libName = "./liblocateanythingcpp-fallback.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory # Create lib directory
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ cp -fv $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/liblocateanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/ cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so" LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export LOCATEANYTHING_LIBRARY=$LIBRARY export LOCATEANYTHING_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -65,7 +65,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
else else
VARIANT_TARGETS = libgomnivoicecpp-fallback.so # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
VARIANT_TARGETS = libgomnivoicecpp-fallback.dylib
endif endif
omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS) omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
@@ -77,7 +78,7 @@ package: omnivoice-cpp
build: package build: package
clean: purge clean: purge
rm -rf libgomnivoicecpp*.so package sources/omnivoice.cpp omnivoice-cpp rm -rf libgomnivoicecpp*.so libgomnivoicecpp*.dylib package sources/omnivoice.cpp omnivoice-cpp
purge: purge:
rm -rf build* rm -rf build*
@@ -106,13 +107,20 @@ libgomnivoicecpp-fallback.so: sources/omnivoice.cpp
SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
rm -rf build-libgomnivoicecpp-fallback.so rm -rf build-libgomnivoicecpp-fallback.so
# Build fallback variant as a dylib (Darwin)
libgomnivoicecpp-fallback.dylib: sources/omnivoice.cpp
$(info ${GREEN}I omnivoice-cpp build info:fallback (dylib)${RESET})
SO_TARGET=libgomnivoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
rm -rf build-libgomnivoicecpp-fallback.dylib
libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \ cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \ cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgomnivoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: omnivoice-cpp test: omnivoice-cpp
@echo "Running omnivoice-cpp tests..." @echo "Running omnivoice-cpp tests..."

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() { func main() {
libName := os.Getenv("OMNIVOICE_LIBRARY") libName := os.Getenv("OMNIVOICE_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgomnivoicecpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgomnivoicecpp-fallback.dylib"
} else {
libName = "./libgomnivoicecpp-fallback.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/ cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libgomnivoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so" LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export OMNIVOICE_LIBRARY=$LIBRARY export OMNIVOICE_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -74,6 +74,7 @@ libparakeet.so: sources/parakeet.cpp
cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS) cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS) cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
cp -fv sources/parakeet.cpp/build-shared/libparakeet.dylib ./ 2>/dev/null || true
cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./ cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go

View File

@@ -2,15 +2,17 @@ package main
// Started internally by LocalAI - one gRPC server per loaded model. // Started internally by LocalAI - one gRPC server per loaded model.
// //
// Loads libparakeet.so via purego and registers the flat C-API entry // Loads the parakeet shared library via purego and registers the flat
// points declared in parakeet_capi.h. The library name can be overridden // C-API entry points declared in parakeet_capi.h. The library name can be
// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY // overridden with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY /
// convention in the sibling backends); the default looks for the .so next // VIBEVOICECPP_LIBRARY convention in the sibling backends); the default
// to this binary. // looks next to this binary for libparakeet.so on Linux and
// libparakeet.dylib on macOS.
import ( import (
"flag" "flag"
"fmt" "fmt"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -28,7 +30,11 @@ type LibFuncs struct {
func main() { func main() {
libName := os.Getenv("PARAKEET_LIBRARY") libName := os.Getenv("PARAKEET_LIBRARY")
if libName == "" { if libName == "" {
libName = "libparakeet.so" if runtime.GOOS == "darwin" {
libName = "libparakeet.dylib"
} else {
libName = "libparakeet.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -16,12 +16,15 @@ mkdir -p "$CURDIR/package/lib"
cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/" cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
cp -avf "$CURDIR/run.sh" "$CURDIR/package/" cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
# libparakeet.so + any soname symlinks (libparakeet.so.X[.Y]). purego.Dlopen # libparakeet shared lib + any soname symlinks. On Linux this is
# resolves it via LD_LIBRARY_PATH, which run.sh points at lib/. # libparakeet.so[.X.Y]; on macOS it is libparakeet.dylib. purego.Dlopen
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || { # resolves it via the *_LIBRARY_PATH that run.sh points at lib/.
echo "ERROR: libparakeet.so not found in $CURDIR, run 'make' first" >&2 cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || true
cp -avf "$CURDIR"/libparakeet.dylib "$CURDIR/package/lib/" 2>/dev/null || true
if ! ls "$CURDIR"/package/lib/libparakeet.* >/dev/null 2>&1; then
echo "ERROR: libparakeet shared library not found in $CURDIR, run 'make' first" >&2
exit 1 exit 1
} fi
# Detect architecture and copy the core runtime libs libparakeet.so links # Detect architecture and copy the core runtime libs libparakeet.so links
# against, plus the matching dynamic loader as lib/ld.so. # against, plus the matching dynamic loader as lib/ld.so.
@@ -48,7 +51,7 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1" cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0" cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
elif [ "$(uname -s)" = "Darwin" ]; then elif [ "$(uname -s)" = "Darwin" ]; then
echo "Detected Darwin" echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
else else
echo "Error: Could not detect architecture" echo "Error: Could not detect architecture"
exit 1 exit 1

View File

@@ -3,11 +3,17 @@ set -e
CURDIR=$(dirname "$(realpath "$0")") CURDIR=$(dirname "$(realpath "$0")")
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}" if [ "$(uname)" = "Darwin" ]; then
export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
else
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
fi
# If a self-contained ld.so was packaged, route through it so the # If a self-contained ld.so was packaged, route through it so the
# packaged libc / libstdc++ are used instead of the host's (matches the # packaged libc / libstdc++ are used instead of the host's (matches the
# whisper backend's runtime layout). # whisper backend's runtime layout). Linux only.
if [ -f "$CURDIR/lib/ld.so" ]; then if [ -f "$CURDIR/lib/ld.so" ]; then
echo "Using lib/ld.so" echo "Using lib/ld.so"
exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@" exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"

View File

@@ -65,8 +65,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
else else
# On non-Linux (e.g., Darwin), build only fallback variant # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so VARIANT_TARGETS = libgoqwen3ttscpp-fallback.dylib
endif endif
qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS) qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
@@ -78,7 +78,7 @@ package: qwen3-tts-cpp
build: package build: package
clean: purge clean: purge
rm -rf libgoqwen3ttscpp*.so package sources/qwentts.cpp qwen3-tts-cpp rm -rf libgoqwen3ttscpp*.so libgoqwen3ttscpp*.dylib package sources/qwentts.cpp qwen3-tts-cpp
purge: purge:
rm -rf build* rm -rf build*
@@ -110,13 +110,20 @@ libgoqwen3ttscpp-fallback.so: sources/qwentts.cpp
SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
rm -rf build-libgoqwen3ttscpp-fallback.so rm -rf build-libgoqwen3ttscpp-fallback.so
# Build fallback variant as a dylib (Darwin)
libgoqwen3ttscpp-fallback.dylib: sources/qwentts.cpp
$(info ${GREEN}I qwen3-tts-cpp build info:fallback (dylib)${RESET})
SO_TARGET=libgoqwen3ttscpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
rm -rf build-libgoqwen3ttscpp-fallback.dylib
libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \ cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \ cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgoqwen3ttscpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: qwen3-tts-cpp test: qwen3-tts-cpp
@echo "Running qwen3-tts-cpp tests..." @echo "Running qwen3-tts-cpp tests..."

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() { func main() {
libName := os.Getenv("QWEN3TTS_LIBRARY") libName := os.Getenv("QWEN3TTS_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgoqwen3ttscpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgoqwen3ttscpp-fallback.dylib"
} else {
libName = "./libgoqwen3ttscpp-fallback.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/ cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libgoqwen3ttscpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so" LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export QWEN3TTS_LIBRARY=$LIBRARY export QWEN3TTS_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -71,7 +71,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so
else else
# On non-Linux (e.g., Darwin), build only fallback variant # On non-Linux (e.g., Darwin), build only fallback variant
VARIANT_TARGETS = librfdetrcpp-fallback.so VARIANT_TARGETS = librfdetrcpp-fallback.dylib
endif endif
rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS) rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS)
@@ -83,7 +83,7 @@ package: rfdetr-cpp
build: package build: package
clean: purge clean: purge
rm -rf librfdetrcpp*.so rfdetr-cpp package sources rm -rf librfdetrcpp*.so librfdetrcpp*.dylib rfdetr-cpp package sources
purge: purge:
rm -rf build* rm -rf build*
@@ -110,11 +110,19 @@ librfdetrcpp-avx512.so: sources/rt-detr.cpp
endif endif
# Build fallback variant (all platforms) # Build fallback variant (all platforms)
ifeq ($(UNAME_S),Darwin)
librfdetrcpp-fallback.dylib: sources/rt-detr.cpp
rm -rfv build-$@
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
rm -rfv build-$@
else
librfdetrcpp-fallback.so: sources/rt-detr.cpp librfdetrcpp-fallback.so: sources/rt-detr.cpp
rm -rfv build-$@ rm -rfv build-$@
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET}) $(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
rm -rfv build-$@ rm -rfv build-$@
endif
librfdetrcpp-custom: CMakeLists.txt librfdetrcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
@@ -122,7 +130,8 @@ librfdetrcpp-custom: CMakeLists.txt
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/librfdetrcpp.dylib ./$(SO_TARGET) 2>/dev/null)
all: rfdetr-cpp package all: rfdetr-cpp package

View File

@@ -9,6 +9,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("RFDETR_LIBRARY") libName := os.Getenv("RFDETR_LIBRARY")
if libName == "" { if libName == "" {
libName = "./librfdetrcpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./librfdetrcpp-fallback.dylib"
} else {
libName = "./librfdetrcpp-fallback.so"
}
} }
rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory # Create lib directory
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/librfdetrcpp-*.so $CURDIR/package/ cp -fv $CURDIR/librfdetrcpp-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/librfdetrcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/ cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/librfdetrcpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/librfdetrcpp-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/librfdetrcpp-avx.so ]; then if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/librfdetrcpp-avx512.so" LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export RFDETR_LIBRARY=$LIBRARY export RFDETR_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -66,7 +66,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
else else
# On non-Linux (e.g., Darwin), build only fallback variant # On non-Linux (e.g., Darwin), build only fallback variant
VARIANT_TARGETS = libgosam3-fallback.so VARIANT_TARGETS = libgosam3-fallback.dylib
endif endif
sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS) sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
@@ -78,7 +78,7 @@ package: sam3-cpp
build: package build: package
clean: purge clean: purge
rm -rf libgosam3*.so sam3-cpp package sources rm -rf libgosam3*.so libgosam3*.dylib sam3-cpp package sources
purge: purge:
rm -rf build* rm -rf build*
@@ -105,11 +105,19 @@ libgosam3-avx512.so: sources/sam3.cpp
endif endif
# Build fallback variant (all platforms) # Build fallback variant (all platforms)
ifeq ($(UNAME_S),Darwin)
libgosam3-fallback.dylib: sources/sam3.cpp
$(MAKE) purge
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
SO_TARGET=libgosam3-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
rm -rfv build*
else
libgosam3-fallback.so: sources/sam3.cpp libgosam3-fallback.so: sources/sam3.cpp
$(MAKE) purge $(MAKE) purge
$(info ${GREEN}I sam3-cpp build info:fallback${RESET}) $(info ${GREEN}I sam3-cpp build info:fallback${RESET})
SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
rm -rfv build* rm -rfv build*
endif
libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
@@ -117,6 +125,7 @@ libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgosam3.dylib ./$(SO_TARGET) 2>/dev/null)
all: sam3-cpp package all: sam3-cpp package

View File

@@ -3,6 +3,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("SAM3_LIBRARY") libName := os.Getenv("SAM3_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgosam3-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgosam3-fallback.dylib"
} else {
libName = "./libgosam3-fallback.so"
}
} }
gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory # Create lib directory
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/ cp -fv $CURDIR/libgosam3-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libgosam3-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/sam3-cpp $CURDIR/package/ cp -avf $CURDIR/sam3-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgosam3-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libgosam3-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgosam3-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgosam3-avx.so ]; then if [ -e $CURDIR/libgosam3-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgosam3-avx512.so" LIBRARY="$CURDIR/libgosam3-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export SAM3_LIBRARY=$LIBRARY export SAM3_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@@ -238,11 +239,19 @@ func loadSherpaLibs() error {
func loadSherpaLibsOnce() error { func loadSherpaLibsOnce() error {
shimLib := os.Getenv("SHERPA_SHIM_LIBRARY") shimLib := os.Getenv("SHERPA_SHIM_LIBRARY")
if shimLib == "" { if shimLib == "" {
shimLib = "libsherpa-shim.so" if runtime.GOOS == "darwin" {
shimLib = "libsherpa-shim.dylib"
} else {
shimLib = "libsherpa-shim.so"
}
} }
capiLib := os.Getenv("SHERPA_ONNX_LIBRARY") capiLib := os.Getenv("SHERPA_ONNX_LIBRARY")
if capiLib == "" { if capiLib == "" {
capiLib = "libsherpa-onnx-c-api.so" if runtime.GOOS == "darwin" {
capiLib = "libsherpa-onnx-c-api.dylib"
} else {
capiLib = "libsherpa-onnx-c-api.so"
}
} }
shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL) shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -3,7 +3,13 @@ set -ex
CURDIR=$(dirname "$(realpath $0)") CURDIR=$(dirname "$(realpath $0)")
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH if [ "$(uname)" = "Darwin" ]; then
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
export SHERPA_SHIM_LIBRARY=$CURDIR/lib/libsherpa-shim.dylib
export SHERPA_ONNX_LIBRARY=$CURDIR/lib/libsherpa-onnx-c-api.dylib
else
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
if [ -f $CURDIR/lib/ld.so ]; then if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so" echo "Using lib/ld.so"

View File

@@ -131,6 +131,7 @@ libgosd-custom: CMakeLists.txt cpp/gosd.cpp cpp/gosd.h
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgosd.dylib ./$(SO_TARGET) 2>/dev/null)
all: stablediffusion-ggml package all: stablediffusion-ggml package

View File

@@ -3,6 +3,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("SD_LIBRARY") libName := os.Getenv("SD_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgosd-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgosd-fallback.dylib"
} else {
libName = "./libgosd-fallback.so"
}
} }
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -12,6 +12,7 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libgosd-*.so $CURDIR/package/ cp -avf $CURDIR/libgosd-*.so $CURDIR/package/
cp -fv $CURDIR/libgosd-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/ cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/

View File

@@ -12,9 +12,18 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgosd-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single library variant (Metal or Accelerate). The gosd target is
# built as a CMake MODULE, which emits a .dylib for a SHARED build but a
# .so for a MODULE build on Apple, so prefer .dylib and fall back to .so.
LIBRARY="$CURDIR/libgosd-fallback.dylib"
if [ ! -e "$LIBRARY" ]; then
LIBRARY="$CURDIR/libgosd-fallback.so"
fi
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgosd-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgosd-avx.so ]; then if [ -e $CURDIR/libgosd-avx.so ]; then
@@ -36,9 +45,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgosd-avx512.so" LIBRARY="$CURDIR/libgosd-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export SD_LIBRARY=$LIBRARY export SD_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -16,7 +16,6 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"runtime"
"strings" "strings"
"time" "time"
"unicode" "unicode"
@@ -944,13 +943,7 @@ func InitializeONNXRuntime() error {
} }
} }
if libPath == "" { if libPath == "" {
// LocalAI: default to the platform-native shared library libPath = "/usr/local/lib/libonnxruntime.so"
// extension when nothing else is found (dyld vs ld.so).
if runtime.GOOS == "darwin" {
libPath = "/usr/local/lib/libonnxruntime.dylib"
} else {
libPath = "/usr/local/lib/libonnxruntime.so"
}
} }
} }
ort.SetSharedLibraryPath(libPath) ort.SetSharedLibraryPath(libPath)

View File

@@ -32,10 +32,6 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2 cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1 cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0 cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
elif [ $(uname -s) = "Darwin" ]; then
# macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in
# run.sh); there is no ld.so loader nor glibc to bundle.
echo "Detected Darwin"
else else
echo "Error: Could not detect architecture" echo "Error: Could not detect architecture"
exit 1 exit 1

View File

@@ -3,19 +3,12 @@ set -ex
CURDIR=$(dirname "$(realpath $0)") CURDIR=$(dirname "$(realpath $0)")
if [ "$(uname)" = "Darwin" ]; then export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
# macOS uses dyld: there is no ld.so loader, and the search path env export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
else
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
if [ -f $CURDIR/lib/ld.so ]; then if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so" echo "Using lib/ld.so"
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@" exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
fi
fi fi
exec $CURDIR/supertonic "$@" exec $CURDIR/supertonic "$@"

View File

@@ -70,8 +70,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so
else else
# On non-Linux (e.g., Darwin), build only fallback variant # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
VARIANT_TARGETS = libgovibevoicecpp-fallback.so VARIANT_TARGETS = libgovibevoicecpp-fallback.dylib
endif endif
vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS) vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS)
@@ -83,7 +83,7 @@ package: vibevoice-cpp
build: package build: package
clean: purge clean: purge
rm -rf libgovibevoicecpp*.so package sources/vibevoice.cpp vibevoice-cpp rm -rf libgovibevoicecpp*.so libgovibevoicecpp*.dylib package sources/vibevoice.cpp vibevoice-cpp
purge: purge:
rm -rf build* rm -rf build*
@@ -119,13 +119,21 @@ libgovibevoicecpp-fallback.so: sources/vibevoice.cpp
SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
rm -rfv build* rm -rfv build*
# Build fallback variant as a dylib (Darwin)
libgovibevoicecpp-fallback.dylib: sources/vibevoice.cpp
$(MAKE) purge
$(info ${GREEN}I vibevoice-cpp build info:fallback (dylib)${RESET})
SO_TARGET=libgovibevoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
rm -rfv build*
libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h
mkdir -p build-$(SO_TARGET) && \ mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \ cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \ cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET) (mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgovibevoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: vibevoice-cpp test: vibevoice-cpp
@echo "Running vibevoice-cpp tests..." @echo "Running vibevoice-cpp tests..."

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() { func main() {
libName := os.Getenv("VIBEVOICECPP_LIBRARY") libName := os.Getenv("VIBEVOICECPP_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgovibevoicecpp-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgovibevoicecpp-fallback.dylib"
} else {
libName = "./libgovibevoicecpp-fallback.so"
}
} }
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/ cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/
cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/ cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libgovibevoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -11,9 +11,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
@@ -34,9 +38,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so" LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export VIBEVOICECPP_LIBRARY=$LIBRARY export VIBEVOICECPP_LIBRARY=$LIBRARY
if [ -f $CURDIR/lib/ld.so ]; then if [ -f $CURDIR/lib/ld.so ]; then

View File

@@ -117,6 +117,7 @@ libgowhisper-custom: CMakeLists.txt cpp/gowhisper.cpp cpp/gowhisper.h
cmake .. $(CMAKE_ARGS) && \ cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \ cmake --build . --config Release -j$(JOBS) && \
cd .. && \ cd .. && \
mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET) mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET) 2>/dev/null || \
mv build-$(SO_TARGET)/libgowhisper.dylib ./$(SO_TARGET:.so=.dylib)
all: whisper package all: whisper package

View File

@@ -4,6 +4,7 @@ package main
import ( import (
"flag" "flag"
"os" "os"
"runtime"
"github.com/ebitengine/purego" "github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -22,7 +23,11 @@ func main() {
// Get library name from environment variable, default to fallback // Get library name from environment variable, default to fallback
libName := os.Getenv("WHISPER_LIBRARY") libName := os.Getenv("WHISPER_LIBRARY")
if libName == "" { if libName == "" {
libName = "./libgowhisper-fallback.so" if runtime.GOOS == "darwin" {
libName = "./libgowhisper-fallback.dylib"
} else {
libName = "./libgowhisper-fallback.so"
}
} }
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL) gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)

View File

@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/whisper $CURDIR/package/ cp -avf $CURDIR/whisper $CURDIR/package/
cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/ cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/libgowhisper-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/ cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries # Detect architecture and copy appropriate libraries

View File

@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1 grep -e "flags" /proc/cpuinfo | head -1
fi fi
LIBRARY="$CURDIR/libgowhisper-fallback.so" if [ "$(uname)" = "Darwin" ]; then
# macOS: single dylib variant (Metal or Accelerate)
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
else
LIBRARY="$CURDIR/libgowhisper-fallback.so"
if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK" echo "CPU: AVX found OK"
if [ -e $CURDIR/libgowhisper-avx.so ]; then if [ -e $CURDIR/libgowhisper-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgowhisper-avx512.so" LIBRARY="$CURDIR/libgowhisper-avx512.so"
fi fi
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi fi
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export WHISPER_LIBRARY=$LIBRARY export WHISPER_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it # If there is a lib/ld.so, use it

View File

@@ -1284,7 +1284,6 @@
nvidia-cuda-13: "cuda13-liquid-audio" nvidia-cuda-13: "cuda13-liquid-audio"
nvidia-cuda-12: "cuda12-liquid-audio" nvidia-cuda-12: "cuda12-liquid-audio"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
metal: "metal-liquid-audio"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
- &qwen-tts - &qwen-tts
urls: urls:
@@ -1570,7 +1569,6 @@
- TTS - TTS
capabilities: capabilities:
default: "cpu-supertonic" default: "cpu-supertonic"
metal: "metal-supertonic"
- !!merge <<: *neutts - !!merge <<: *neutts
name: "neutts-development" name: "neutts-development"
capabilities: capabilities:
@@ -4614,7 +4612,6 @@
nvidia-cuda-13: "cuda13-liquid-audio-development" nvidia-cuda-13: "cuda13-liquid-audio-development"
nvidia-cuda-12: "cuda12-liquid-audio-development" nvidia-cuda-12: "cuda12-liquid-audio-development"
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development" nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
metal: "metal-liquid-audio-development"
- !!merge <<: *liquid-audio - !!merge <<: *liquid-audio
name: "cpu-liquid-audio" name: "cpu-liquid-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
@@ -4625,16 +4622,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
mirrors: mirrors:
- localai/localai-backends:master-cpu-liquid-audio - localai/localai-backends:master-cpu-liquid-audio
- !!merge <<: *liquid-audio
name: "metal-liquid-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-liquid-audio
- !!merge <<: *liquid-audio
name: "metal-liquid-audio-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-liquid-audio
- !!merge <<: *liquid-audio - !!merge <<: *liquid-audio
name: "cuda12-liquid-audio" name: "cuda12-liquid-audio"
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
@@ -5295,7 +5282,6 @@
nvidia: "cuda12-trl" nvidia: "cuda12-trl"
nvidia-cuda-12: "cuda12-trl" nvidia-cuda-12: "cuda12-trl"
nvidia-cuda-13: "cuda13-trl" nvidia-cuda-13: "cuda13-trl"
metal: "metal-trl"
## TRL backend images ## TRL backend images
- !!merge <<: *trl - !!merge <<: *trl
name: "cpu-trl" name: "cpu-trl"
@@ -5327,16 +5313,6 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
mirrors: mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl - localai/localai-backends:master-gpu-nvidia-cuda-13-trl
- !!merge <<: *trl
name: "metal-trl"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-trl
- !!merge <<: *trl
name: "metal-trl-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-trl
## llama.cpp quantization backend ## llama.cpp quantization backend
- &llama-cpp-quantization - &llama-cpp-quantization
name: "llama-cpp-quantization" name: "llama-cpp-quantization"
@@ -5508,7 +5484,6 @@
name: "supertonic-development" name: "supertonic-development"
capabilities: capabilities:
default: "cpu-supertonic-development" default: "cpu-supertonic-development"
metal: "metal-supertonic-development"
- !!merge <<: *supertonic - !!merge <<: *supertonic
name: "cpu-supertonic" name: "cpu-supertonic"
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic" uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
@@ -5519,13 +5494,3 @@
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic" uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
mirrors: mirrors:
- localai/localai-backends:master-cpu-supertonic - localai/localai-backends:master-cpu-supertonic
- !!merge <<: *supertonic
name: "metal-supertonic"
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic"
mirrors:
- localai/localai-backends:latest-metal-darwin-arm64-supertonic
- !!merge <<: *supertonic
name: "metal-supertonic-development"
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic"
mirrors:
- localai/localai-backends:master-metal-darwin-arm64-supertonic

View File

@@ -14,11 +14,5 @@ else
fi fi
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins # liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade" EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
# it on the uv path; Linux/CUDA resolution is unchanged.
if [ "x${USE_PIP:-}" != "xtrue" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
fi
installRequirements installRequirements

View File

@@ -1,4 +1,3 @@
# MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job.
torch>=2.8.0 torch>=2.8.0
torchaudio>=2.8.0 torchaudio>=2.8.0
torchcodec>=0.9.1 torchcodec>=0.9.1

View File

@@ -8,13 +8,7 @@ else
source $backend_dir/../common/libbackend.sh source $backend_dir/../common/libbackend.sh
fi fi
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade" EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
if [ "x${USE_PIP:-}" != "xtrue" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
fi
installRequirements installRequirements
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version # Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version

View File

@@ -1,12 +0,0 @@
torch==2.10.0
trl
peft
datasets>=3.0.0
transformers>=4.56.2
accelerate>=1.4.0
huggingface-hub>=1.3.0
sentencepiece
# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
# on macOS arm64.

View File

@@ -54,35 +54,8 @@ func (g GPU) IsNVIDIABlackwell() bool {
return maj >= 12 return maj >= 12
} }
// Compute-buffer headroom guard for the raised physical batch.
//
// Raising n_ubatch grows the CUDA *compute buffer* (the scratch for the forward
// graph), which is allocated PER DEVICE — it does not benefit from a second GPU
// the way weights or KV (which are split across devices) do. The buffer scales
// ~linearly with n_ubatch * n_ctx, so a large context turns the GB10-tuned
// ub2048 into multi-GiB of extra scratch that must fit on a SINGLE card. On a
// 16 GiB consumer Blackwell with a 200k context that overflows (issue #10485),
// even though the GB10 it was measured on (128 GiB unified memory) had room.
//
// These constants size a conservative guard: only raise the batch when the
// extra scratch fits the per-device VRAM ceiling.
const (
// computeBufferBytesPerCell approximates the CUDA compute-buffer cost of one
// (n_ubatch * n_ctx) cell. Derived from an observed allocation (ub2048 *
// ctx204800 ~= 4.5 GiB => ~11 B/cell) and rounded up to 16 for margin, since
// the real cost also grows with model width (heads / embedding dim) which we
// don't know at config time.
computeBufferBytesPerCell = 16
// blackwellBatchHeadroomDivisor caps the extra compute buffer from raising the
// physical batch at VRAM/divisor. /4 keeps the bulk of a device for weights +
// KV, which already dominate VRAM use.
blackwellBatchHeadroomDivisor = 4
)
// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the // PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
// given hardware class, ignoring context/VRAM headroom. Use // given hardware, used when the model config leaves batch unset.
// PhysicalBatchForContext when a model context and per-device VRAM are known
// (the load paths) so the raised batch can't overflow a single device.
func PhysicalBatch(g GPU) int { func PhysicalBatch(g GPU) int {
if g.IsNVIDIABlackwell() { if g.IsNVIDIABlackwell() {
return BlackwellPhysicalBatch return BlackwellPhysicalBatch
@@ -90,32 +63,6 @@ func PhysicalBatch(g GPU) int {
return DefaultPhysicalBatch return DefaultPhysicalBatch
} }
// PhysicalBatchForContext is PhysicalBatch gated on per-device VRAM headroom for
// the given context: it only raises the batch above the conservative default
// when the extra compute buffer (which is allocated on a single device and grows
// with n_ubatch * n_ctx) fits within blackwellBatchHeadroomDivisor of the GPU's
// VRAM. g.VRAM must be the PER-DEVICE ceiling (the smallest device on a
// multi-GPU host), not the summed total — the compute buffer can't be split.
//
// VRAM 0 (unknown) stays conservative rather than risk a per-device OOM; the
// GB10 / unified-memory path reports system RAM, so it still clears the guard.
func PhysicalBatchForContext(g GPU, ctx int) int {
if !g.IsNVIDIABlackwell() {
return DefaultPhysicalBatch
}
if ctx <= 0 {
ctx = DefaultContextSize
}
if g.VRAM == 0 {
return DefaultPhysicalBatch
}
extra := uint64(ctx) * uint64(BlackwellPhysicalBatch-DefaultPhysicalBatch) * computeBufferBytesPerCell
if extra <= g.VRAM/blackwellBatchHeadroomDivisor {
return BlackwellPhysicalBatch
}
return DefaultPhysicalBatch
}
// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns. // IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
// Callers that re-tune a value chosen by an upstream host (the distributed // Callers that re-tune a value chosen by an upstream host (the distributed
// router correcting the frontend's guess) use this to avoid clobbering an // router correcting the frontend's guess) use this to avoid clobbering an
@@ -175,12 +122,7 @@ func hasParallelOption(opts []string) bool {
// deterministic device — detection does a live nvidia-smi call. // deterministic device — detection does a live nvidia-smi call.
var localGPU = func() GPU { var localGPU = func() GPU {
vendor, _ := xsysinfo.DetectGPUVendor() vendor, _ := xsysinfo.DetectGPUVendor()
// Use the SMALLEST device's VRAM, not the summed total: the parallel-slot vram, _ := xsysinfo.TotalAvailableVRAM()
// tier and the batch headroom guard both reason about what fits on a single
// card, and per-device compute buffers can't be split across GPUs. Summing
// two 16 GiB cards into "32 GiB" is what over-provisioned multi-GPU hosts
// into OOM (issue #10485).
vram, _ := xsysinfo.MinPerGPUVRAM()
return GPU{ return GPU{
Vendor: vendor, Vendor: vendor,
ComputeCapability: xsysinfo.NVIDIAComputeCapability(), ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
@@ -195,20 +137,10 @@ func ApplyHardwareDefaults(cfg *ModelConfig, gpu GPU) {
if cfg == nil { if cfg == nil {
return return
} }
// Raise the physical batch on Blackwell only when the resulting compute if cfg.Batch == 0 && gpu.IsNVIDIABlackwell() {
// buffer fits the per-device VRAM at THIS model's context. Leaving Batch at 0 cfg.Batch = BlackwellPhysicalBatch
// (rather than writing the default 512) preserves the downstream single-pass xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
// sizing in core/backend.EffectiveBatchSize for embedding/score/rerank. "batch", cfg.Batch, "compute_cap", gpu.ComputeCapability)
if cfg.Batch == 0 {
ctx := DefaultContextSize
if cfg.ContextSize != nil {
ctx = *cfg.ContextSize
}
if PhysicalBatchForContext(gpu, ctx) == BlackwellPhysicalBatch {
cfg.Batch = BlackwellPhysicalBatch
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability, "context", ctx, "vram_gib", gpu.VRAM>>30)
}
} }
// Enable concurrent serving by default on a capable GPU: without this the // Enable concurrent serving by default on a capable GPU: without this the

View File

@@ -9,37 +9,26 @@ import (
// GPU. The detection seam (localGPU) is injected so the path is deterministic // GPU. The detection seam (localGPU) is injected so the path is deterministic
// without a real GPU. // without a real GPU.
var _ = Describe("SetDefaults hardware defaults (single-instance)", func() { var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
const gib = uint64(1) << 30
var orig func() GPU var orig func() GPU
BeforeEach(func() { orig = localGPU }) BeforeEach(func() { orig = localGPU })
AfterEach(func() { localGPU = orig }) AfterEach(func() { localGPU = orig })
It("sets the physical batch on a local Blackwell GPU with headroom", func() { It("sets the physical batch on a local Blackwell GPU", func() {
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} } localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
cfg := &ModelConfig{} cfg := &ModelConfig{}
cfg.SetDefaults() cfg.SetDefaults()
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch)) Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
}) })
It("leaves batch unset when a large context would overflow the device", func() {
// Regression guard for issue #10485: 16 GiB consumer Blackwell + ~200k ctx.
localGPU = func() GPU { return GPU{ComputeCapability: "12.0", VRAM: 16 * gib} }
ctx := 204800
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
cfg.SetDefaults()
Expect(cfg.Batch).To(Equal(0))
})
It("leaves batch unset on a non-Blackwell local GPU", func() { It("leaves batch unset on a non-Blackwell local GPU", func() {
localGPU = func() GPU { return GPU{ComputeCapability: "8.9", VRAM: 119 * gib} } localGPU = func() GPU { return GPU{ComputeCapability: "8.9"} }
cfg := &ModelConfig{} cfg := &ModelConfig{}
cfg.SetDefaults() cfg.SetDefaults()
Expect(cfg.Batch).To(Equal(0)) Expect(cfg.Batch).To(Equal(0))
}) })
It("never overrides an explicit batch", func() { It("never overrides an explicit batch", func() {
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} } localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
cfg := &ModelConfig{} cfg := &ModelConfig{}
cfg.Batch = 1024 cfg.Batch = 1024
cfg.SetDefaults() cfg.SetDefaults()

View File

@@ -7,8 +7,6 @@ import (
) )
var _ = Describe("Hardware-driven config defaults", func() { var _ = Describe("Hardware-driven config defaults", func() {
const gib = uint64(1) << 30
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)", DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
func(cc string, want bool) { func(cc string, want bool) {
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want)) Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
@@ -37,54 +35,21 @@ var _ = Describe("Hardware-driven config defaults", func() {
}) })
}) })
Describe("PhysicalBatchForContext (per-device VRAM headroom)", func() {
It("raises the batch when the compute buffer fits the device", func() {
// 16 GiB Blackwell with a small context: the extra scratch is tiny.
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 8192)).
To(Equal(BlackwellPhysicalBatch))
})
It("keeps the default batch when a large context would overflow one device", func() {
// The issue #10485 case: 16 GiB consumer Blackwell, ~200k context.
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 204800)).
To(Equal(DefaultPhysicalBatch))
})
It("still raises the batch on a large unified-memory device (GB10)", func() {
// GB10 reports system RAM (~119 GiB) as its single device's VRAM.
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1", VRAM: 119 * gib}, 204800)).
To(Equal(BlackwellPhysicalBatch))
})
It("stays conservative when VRAM is unknown", func() {
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1"}, 8192)).
To(Equal(DefaultPhysicalBatch))
})
It("never raises the batch on non-Blackwell", func() {
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "9.0", VRAM: 80 * gib}, 8192)).
To(Equal(DefaultPhysicalBatch))
})
})
Describe("ApplyHardwareDefaults", func() { Describe("ApplyHardwareDefaults", func() {
It("raises an unset batch to 2048 on Blackwell with headroom", func() { It("raises an unset batch to 2048 on Blackwell", func() {
cfg := &ModelConfig{} cfg := &ModelConfig{}
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib}) ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch)) Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
}) })
It("leaves batch unset when a large context would overflow one device", func() {
// Regression guard for issue #10485: 16 GiB card + ~200k context.
ctx := 204800
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.0", VRAM: 16 * gib})
Expect(cfg.Batch).To(Equal(0))
})
It("leaves batch unset on non-Blackwell", func() { It("leaves batch unset on non-Blackwell", func() {
cfg := &ModelConfig{} cfg := &ModelConfig{}
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0", VRAM: 119 * gib}) ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
Expect(cfg.Batch).To(Equal(0)) Expect(cfg.Batch).To(Equal(0))
}) })
It("never overrides an explicit batch", func() { It("never overrides an explicit batch", func() {
cfg := &ModelConfig{} cfg := &ModelConfig{}
cfg.Batch = 1024 cfg.Batch = 1024
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib}) ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
Expect(cfg.Batch).To(Equal(1024)) Expect(cfg.Batch).To(Equal(1024))
}) })
It("no-ops on nil", func() { It("no-ops on nil", func() {
@@ -92,6 +57,8 @@ var _ = Describe("Hardware-driven config defaults", func() {
}) })
}) })
const gib = uint64(1) << 30
DescribeTable("DefaultParallelSlots (by VRAM)", DescribeTable("DefaultParallelSlots (by VRAM)",
func(vramGiB uint64, want int) { func(vramGiB uint64, want int) {
Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want)) Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want))

View File

@@ -1204,6 +1204,11 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
// This ensures gallery-installed and runtime-loaded models get optimal parameters. // This ensures gallery-installed and runtime-loaded models get optimal parameters.
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model) ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell).
// Uses the local GPU here; in distributed mode the router re-applies the same
// heuristics for the selected node's GPU before loading. Explicit config wins.
ApplyHardwareDefaults(cfg, localGPU())
// Apply serving-policy defaults (device-independent): cross-request prefix // Apply serving-policy defaults (device-independent): cross-request prefix
// caching. Propagates to distributed nodes via the model options. // caching. Propagates to distributed nodes via the model options.
ApplyServingDefaults(cfg) ApplyServingDefaults(cfg)
@@ -1242,16 +1247,6 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.ContextSize = &ctx cfg.ContextSize = &ctx
} }
runBackendHooks(cfg, lo.modelPath) runBackendHooks(cfg, lo.modelPath)
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell)
// LAST, after the context size is fully resolved (explicit config, LoadOptions,
// then the GGUF guess inside runBackendHooks): the Blackwell batch guard sizes
// the per-device compute buffer against this model's context, so it must see
// the final value, not a pre-guess nil. Uses the local GPU here; in distributed
// mode the router re-applies the same heuristics for the selected node's GPU
// before loading. Explicit config always wins.
ApplyHardwareDefaults(cfg, localGPU())
cfg.syncKnownUsecasesFromString() cfg.syncKnownUsecasesFromString()
} }

View File

@@ -432,7 +432,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
if pipeline.SoundDetection == "" { if pipeline.SoundDetection == "" {
return nil, nil return nil, nil
} }
cfg, err := loadPipelineSubModel(cl, pipeline.SoundDetection, ml.ModelPath) cfg, err := cl.LoadModelConfigFileByName(pipeline.SoundDetection, ml.ModelPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load sound detection config: %w", err) return nil, fmt.Errorf("failed to load sound detection config: %w", err)
} }
@@ -443,7 +443,7 @@ func loadSoundDetectionConfig(pipeline *config.Pipeline, cl *config.ModelConfigL
} }
func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) { func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (Model, *config.ModelConfig, error) {
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath) cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("failed to load backend config: %w", err) return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -453,7 +453,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
return nil, nil, fmt.Errorf("failed to validate config: %w", err) return nil, nil, fmt.Errorf("failed to validate config: %w", err)
} }
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath) cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("failed to load backend config: %w", err) return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -542,30 +542,11 @@ func buildRealtimeRoutingContext(a *application.Application, sessionID string) *
} }
} }
// loadPipelineSubModel loads a pipeline sub-model config by name and follows a
// single alias hop, so a pipeline that references an alias (e.g. `llm: default`)
// gets the alias target's full config (Backend, Model, ...) rather than the
// alias stub with an empty Backend. Without this the alias survives unresolved
// into model loading and fails downstream — notably in distributed mode with
// "backend name is empty". Mirrors the top-level alias resolution in
// core/http/middleware/request.go.
func loadPipelineSubModel(cl *config.ModelConfigLoader, name, modelPath string) (*config.ModelConfig, error) {
cfg, err := cl.LoadModelConfigFileByName(name, modelPath)
if err != nil {
return nil, err
}
resolved, _, err := cl.ResolveAlias(cfg)
if err != nil {
return nil, err
}
return resolved, nil
}
// returns and loads either a wrapped model or a model that support audio-to-audio // returns and loads either a wrapped model or a model that support audio-to-audio
func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) { func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, evaluator *templates.Evaluator, routing *RealtimeRoutingContext) (Model, error) {
xlog.Debug("Creating new model pipeline model", "pipeline", pipeline) xlog.Debug("Creating new model pipeline model", "pipeline", pipeline)
cfgVAD, err := loadPipelineSubModel(cl, pipeline.VAD, ml.ModelPath) cfgVAD, err := cl.LoadModelConfigFileByName(pipeline.VAD, ml.ModelPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err) return nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -576,7 +557,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
} }
// TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process // TODO: Do we always need a transcription model? It can be disabled. Note that any-to-any instruction following models don't transcribe as such, so if transcription is required it is a separate process
cfgSST, err := loadPipelineSubModel(cl, pipeline.Transcription, ml.ModelPath) cfgSST, err := cl.LoadModelConfigFileByName(pipeline.Transcription, ml.ModelPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err) return nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -608,7 +589,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
xlog.Debug("Loading a wrapped model") xlog.Debug("Loading a wrapped model")
// Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations // Otherwise we want to return a wrapped model, which is a "virtual" model that re-uses other models to perform operations
cfgLLM, err := loadPipelineSubModel(cl, pipeline.LLM, ml.ModelPath) cfgLLM, err := cl.LoadModelConfigFileByName(pipeline.LLM, ml.ModelPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err) return nil, fmt.Errorf("failed to load backend config: %w", err)
@@ -623,7 +604,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
applyPipelineReasoning(cfgLLM, *pipeline) applyPipelineReasoning(cfgLLM, *pipeline)
applyPipelineThinking(cfgLLM, *pipeline) applyPipelineThinking(cfgLLM, *pipeline)
cfgTTS, err := loadPipelineSubModel(cl, pipeline.TTS, ml.ModelPath) cfgTTS, err := cl.LoadModelConfigFileByName(pipeline.TTS, ml.ModelPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load backend config: %w", err) return nil, fmt.Errorf("failed to load backend config: %w", err)

View File

@@ -1,52 +0,0 @@
package openai
import (
"os"
"path/filepath"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/mudler/LocalAI/core/config"
)
// loadPipelineSubModel must resolve a pipeline sub-model that references an
// alias (e.g. `llm: default`) one hop to the alias target's full config — so
// the effective backend is the target's backend, not the empty backend of the
// alias stub. This mirrors the top-level alias resolution done in
// core/http/middleware/request.go, which the realtime pipeline previously
// skipped (failing in distributed mode with "backend name is empty").
var _ = Describe("loadPipelineSubModel", func() {
It("resolves a sub-model alias one hop to the target's config", func() {
tmpDir := GinkgoT().TempDir()
// A real model config with a concrete backend.
realLLM := `name: real-llm
backend: llama-cpp
parameters:
model: real-llm.gguf
`
Expect(os.WriteFile(filepath.Join(tmpDir, "real-llm.yaml"), []byte(realLLM), 0644)).To(Succeed())
// An alias pointing at the real model.
aliasCfg := `name: default
alias: real-llm
`
Expect(os.WriteFile(filepath.Join(tmpDir, "default.yaml"), []byte(aliasCfg), 0644)).To(Succeed())
cl := config.NewModelConfigLoader(tmpDir)
Expect(cl.LoadModelConfigsFromPath(tmpDir)).To(Succeed())
// Resolving the alias must follow the hop to the target's full config.
resolved, err := loadPipelineSubModel(cl, "default", tmpDir)
Expect(err).NotTo(HaveOccurred())
Expect(resolved.IsAlias()).To(BeFalse())
Expect(resolved.Backend).To(Equal("llama-cpp"))
// A non-alias name must load unchanged.
direct, err := loadPipelineSubModel(cl, "real-llm", tmpDir)
Expect(err).NotTo(HaveOccurred())
Expect(direct.Backend).To(Equal("llama-cpp"))
Expect(direct.Name).To(Equal("real-llm"))
})
})

View File

@@ -86,7 +86,6 @@
"input": { "input": {
"placeholder": "Message...", "placeholder": "Message...",
"attachFile": "Attach file", "attachFile": "Attach file",
"send": "Send message",
"stopGenerating": "Stop generating", "stopGenerating": "Stop generating",
"canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download", "canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download",
"canvasLabel": "Canvas", "canvasLabel": "Canvas",

View File

@@ -77,21 +77,6 @@
"noModelsTitle": "No Models Available", "noModelsTitle": "No Models Available",
"noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting." "noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting."
}, },
"starters": {
"title": "Recommended for your hardware",
"tier": {
"cpu": "CPU-only",
"gpu-small": "GPU",
"gpu-mid": "GPU",
"gpu-large": "GPU"
},
"cpuNote": "No GPU detected — these small models stay responsive on CPU.",
"gpuNote": "Picked to fit your available VRAM with room for context.",
"install": "Install",
"installing": "Installing",
"installStarted": "Installing {{model}}…",
"installFailed": "Install failed: {{message}}"
},
"connect": { "connect": {
"title": "One endpoint, every API", "title": "One endpoint, every API",
"subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.", "subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.",

View File

@@ -2,16 +2,6 @@
"title": "Install Models", "title": "Install Models",
"subtitle": "Browse and install AI models from the gallery", "subtitle": "Browse and install AI models from the gallery",
"models": "Models", "models": "Models",
"recommended": {
"title": "Recommended for your hardware",
"cpuNote": "No GPU detected - small models that stay responsive on CPU.",
"gpuNote": "Sized to fit your available VRAM with room for context.",
"install": "Install",
"installing": "Installing",
"installStarted": "Installing {{model}}…",
"installFailed": "Install failed: {{message}}",
"dismiss": "Dismiss recommendations"
},
"stats": { "stats": {
"available": "Available", "available": "Available",
"installed": "Installed" "installed": "Installed"

View File

@@ -45,7 +45,7 @@
}, },
"scheduling": { "scheduling": {
"title": "Penjadwalan", "title": "Penjadwalan",
"subtitle": "Aturan penempatan model dan replika di seluruh kluster" "subtitle": "Aturan penempatan model dan replika di seluruh klaster"
}, },
"p2p": { "p2p": {
"title": "Komputasi AI Terdistribusi", "title": "Komputasi AI Terdistribusi",

View File

@@ -72,7 +72,7 @@
"actions": { "actions": {
"copy": "Salin", "copy": "Salin",
"regenerate": "Hasilkan ulang", "regenerate": "Hasilkan ulang",
"jumpToLatest": "Lompat ke terbaru" "jumpToLatest": "Jump to latest"
}, },
"streaming": { "streaming": {
"transferring": "Mentransfer model...", "transferring": "Mentransfer model...",

View File

@@ -1,8 +1,8 @@
{ {
"unsaved": { "unsaved": {
"title": "Buang perubahan yang belum disimpan?", "title": "Discard unsaved changes?",
"message": "Anda memiliki perubahan yang belum disimpan. Perubahan tersebut akan hilang jika Anda meninggalkan halaman ini.", "message": "You have unsaved changes that will be lost if you leave this page.",
"leave": "Tinggalkan Halaman" "leave": "Leave"
}, },
"actions": { "actions": {
"save": "Simpan", "save": "Simpan",

View File

@@ -7,15 +7,15 @@
"resourceGpu": "GPU", "resourceGpu": "GPU",
"resourceRam": "RAM", "resourceRam": "RAM",
"greeting": { "greeting": {
"morning": "Selamat pagi", "morning": "Good morning",
"afternoon": "Selamat siang", "afternoon": "Good afternoon",
"evening": "Selamat malam", "evening": "Good evening",
"night": "Selamat lembur" "night": "Working late"
}, },
"statusLine": { "statusLine": {
"modelsLoaded_one": "{{count}} model dimuat", "modelsLoaded_one": "{{count}} model loaded",
"modelsLoaded_other": "{{count}} model dimuat", "modelsLoaded_other": "{{count}} models loaded",
"noModelsLoaded": "Tidak ada model yang dimuat", "noModelsLoaded": "No models loaded",
"nodes_one": "{{count}} node", "nodes_one": "{{count}} node",
"nodes_other": "{{count}} nodes" "nodes_other": "{{count}} nodes"
}, },
@@ -79,14 +79,14 @@
}, },
"connect": { "connect": {
"title": "Satu endpoint, semua API", "title": "Satu endpoint, semua API",
"subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Selain itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.", "subtitle": "LocalAI menyediakan API miliknya sendiri yang lengkap — pembuatan gambar & video, depth, deteksi objek, reranking, audio, pengenalan wajah & suara, serta suara realtime melalui WebRTC dan WebSocket. Di atas itu, lapisan kompatibilitas drop-in membuat aplikasi apa pun yang dibuat untuk OpenAI, Anthropic, Ollama, atau OpenAI Responses bekerja tanpa perubahan.",
"nativeTitle": "API native", "nativeTitle": "API native",
"compatTitle": "Kompatibilitas drop-in", "compatTitle": "Kompatibilitas drop-in",
"apiReference": "Referensi API lengkap", "apiReference": "Referensi API lengkap",
"copy": "Salin", "copy": "Salin",
"copied": "Disalin", "copied": "Disalin",
"browse": "Jelajahi API", "browse": "Browse the API",
"hide": "Sembunyikan endpoint", "hide": "Hide endpoints",
"dismiss": "Abaikan" "dismiss": "Dismiss"
} }
} }

View File

@@ -5,7 +5,7 @@
"video": "Video", "video": "Video",
"tts": "TTS", "tts": "TTS",
"sound": "Suara", "sound": "Suara",
"transform": "Transformasi" "transform": "Transform"
} }
}, },
"image": { "image": {
@@ -30,7 +30,7 @@
"refImagesAdded_other": "{{count}} gambar ditambahkan" "refImagesAdded_other": "{{count}} gambar ditambahkan"
}, },
"actions": { "actions": {
"view": "Lihat", "view": "View",
"generate": "Hasilkan", "generate": "Hasilkan",
"generating": "Menghasilkan..." "generating": "Menghasilkan..."
}, },

View File

@@ -19,11 +19,11 @@
"operate": "Operasikan" "operate": "Operasikan"
}, },
"operate": { "operate": {
"inference": "Inferensi", "inference": "Inference",
"cluster": "Kluster", "cluster": "Cluster",
"observability": "Observabilitas", "observability": "Observability",
"access": "Akses", "access": "Access",
"system": "Sistem" "system": "System"
}, },
"items": { "items": {
"home": "Beranda", "home": "Beranda",
@@ -64,7 +64,7 @@
"copyright": "© 2023-{{year}} {{author}}" "copyright": "© 2023-{{year}} {{author}}"
}, },
"console": { "console": {
"automation": "Automasi", "automation": "Otomasi",
"training": "Pelatihan" "training": "Pelatihan"
} }
} }

View File

@@ -6363,130 +6363,6 @@ select.input {
justify-content: center; justify-content: center;
} }
/* ──────────────────── Home: hardware-aware starter models ──────────────────── */
.home-starters {
margin: var(--spacing-lg) 0;
padding: var(--spacing-lg);
}
.home-starters-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: var(--spacing-md);
}
.home-starters-head strong {
font-size: 0.9375rem;
}
.home-starters-tier {
display: inline-flex;
align-items: center;
gap: var(--spacing-xs);
font-size: 0.75rem;
color: var(--color-text-muted);
}
.home-starters-sub {
margin: var(--spacing-xs) 0 var(--spacing-md);
font-size: 0.8125rem;
color: var(--color-text-secondary);
}
.home-starters-list {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.home-starters-item {
display: flex;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-xs) 0;
}
.home-starters-name {
font-weight: 500;
font-size: 0.875rem;
word-break: break-all;
}
.home-starters-badge {
font-size: 0.625rem;
}
.home-starters-size {
margin-left: auto;
font-size: 0.75rem;
color: var(--color-text-muted);
white-space: nowrap;
}
/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */
.rec-models {
margin-bottom: var(--spacing-md);
padding: var(--spacing-md) var(--spacing-lg);
}
.rec-models-head {
display: flex;
align-items: flex-start;
justify-content: space-between;
gap: var(--spacing-md);
}
.rec-models-title {
display: flex;
align-items: center;
gap: var(--spacing-sm);
flex-wrap: wrap;
}
.rec-models-title i {
color: var(--color-primary);
}
.rec-models-note {
font-size: 0.8125rem;
color: var(--color-text-secondary);
}
.rec-models-dismiss {
background: none;
border: none;
color: var(--color-text-muted);
cursor: pointer;
padding: 4px;
flex-shrink: 0;
}
.rec-models-dismiss:hover {
color: var(--color-text-primary);
}
.rec-models-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
gap: var(--spacing-sm);
margin-top: var(--spacing-md);
}
.rec-models-item {
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
padding: var(--spacing-sm) var(--spacing-md);
border: 1px solid var(--color-border-subtle);
border-radius: var(--radius-md);
background: var(--color-bg-primary);
}
.rec-models-item-name {
font-weight: 500;
font-size: 0.8125rem;
word-break: break-all;
}
.rec-models-item-meta {
display: flex;
gap: var(--spacing-sm);
font-size: 0.75rem;
color: var(--color-text-muted);
}
.rec-models-item-fit {
display: inline-flex;
align-items: center;
gap: 4px;
}
/* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */ /* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
.home-connect { .home-connect {

View File

@@ -1,25 +1,8 @@
import { useEffect, useMemo, useCallback } from 'react' import { useEffect, useMemo } from 'react'
import { useModels } from '../hooks/useModels' import { useModels } from '../hooks/useModels'
import SearchableSelect from './SearchableSelect' import SearchableSelect from './SearchableSelect'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
// Remember the last model the user picked, keyed by capability, so returning to
// a page (Home chat box, Image, TTS, Talk...) defaults to that model instead of
// whatever happens to sort first. Only persisted when a capability key exists —
// `externalOptions` callers pass no capability and get the old first-item
// behaviour. localStorage access is wrapped because private-browsing modes throw.
const LAST_MODEL_PREFIX = 'localai_last_model:'
function readLastModel(capability) {
if (!capability) return null
try { return localStorage.getItem(LAST_MODEL_PREFIX + capability) } catch { return null }
}
function writeLastModel(capability, model) {
if (!capability || !model) return
try { localStorage.setItem(LAST_MODEL_PREFIX + capability, model) } catch { /* ignore */ }
}
export default function ModelSelector({ export default function ModelSelector({
value, onChange, capability, className = '', value, onChange, capability, className = '',
options: externalOptions, loading: externalLoading, options: externalOptions, loading: externalLoading,
@@ -36,27 +19,16 @@ export default function ModelSelector({
const isLoading = externalOptions ? (externalLoading || false) : hookLoading const isLoading = externalOptions ? (externalLoading || false) : hookLoading
const isDisabled = isLoading || (externalDisabled || false) const isDisabled = isLoading || (externalDisabled || false)
// Persist genuine selections so the next visit can restore them.
const handleChange = useCallback((next) => {
writeLastModel(capability, next)
onChange(next)
}, [capability, onChange])
useEffect(() => { useEffect(() => {
if (modelNames.length > 0 && (!value || !modelNames.includes(value))) { if (modelNames.length > 0 && (!value || !modelNames.includes(value))) {
// Prefer the remembered model when it's still available; otherwise fall onChange(modelNames[0])
// back to the first option. Don't re-persist here — auto-select is not a
// user choice, and writing back the stored value would be a harmless but
// pointless round-trip.
const remembered = readLastModel(capability)
onChange(remembered && modelNames.includes(remembered) ? remembered : modelNames[0])
} }
}, [modelNames, value, onChange, capability]) }, [modelNames, value, onChange])
return ( return (
<SearchableSelect <SearchableSelect
value={value || ''} value={value || ''}
onChange={handleChange} onChange={onChange}
options={modelNames} options={modelNames}
placeholder={isLoading ? t('selector.loading') : (modelNames.length === 0 ? t('selector.noModels') : t('selector.selectModel'))} placeholder={isLoading ? t('selector.loading') : (modelNames.length === 0 ? t('selector.noModels') : t('selector.selectModel'))}
searchPlaceholder={searchPlaceholder || t('selector.searchPlaceholder')} searchPlaceholder={searchPlaceholder || t('selector.searchPlaceholder')}

View File

@@ -1,86 +0,0 @@
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { modelsApi } from '../utils/api'
import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
const DISMISS_KEY = 'localai_rec_models_dismissed'
// "Recommended for your hardware" strip at the top of the Models gallery. Shares
// the hardware-fit ranking with the empty-state starter widget via
// useRecommendedModels, but styled for the gallery page and dismissible (the
// gallery is a repeat-visit surface, so it shouldn't nag).
export default function RecommendedModels({ addToast }) {
const { t } = useTranslation('models')
const { recommended, tier, loading } = useRecommendedModels({ count: 4 })
const [installing, setInstalling] = useState(() => new Set())
const [dismissed, setDismissed] = useState(() => {
try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false }
})
if (loading || dismissed) return null
if (!recommended || recommended.length === 0) return null
const dismiss = () => {
try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ }
setDismissed(true)
}
const install = async (name) => {
setInstalling(prev => new Set(prev).add(name))
try {
await modelsApi.install(name)
addToast?.(t('recommended.installStarted', { model: name }), 'success')
} catch (err) {
addToast?.(t('recommended.installFailed', { message: err.message }), 'error')
setInstalling(prev => {
const next = new Set(prev)
next.delete(name)
return next
})
}
}
const isGpu = tier.id !== 'cpu'
return (
<div className="rec-models card">
<div className="rec-models-head">
<div className="rec-models-title">
<i className={`fas ${isGpu ? 'fa-microchip' : 'fa-memory'}`} aria-hidden="true" />
<strong>{t('recommended.title')}</strong>
<span className="rec-models-note">{isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')}</span>
</div>
<button type="button" className="rec-models-dismiss" onClick={dismiss} aria-label={t('recommended.dismiss')} title={t('recommended.dismiss')}>
<i className="fas fa-times" aria-hidden="true" />
</button>
</div>
<div className="rec-models-grid">
{recommended.map(m => {
const busy = installing.has(m.name)
return (
<div key={m.name} className="rec-models-item">
<div className="rec-models-item-name">{m.name}</div>
<div className="rec-models-item-meta">
{isNvfp4Name(m.name) && <span className="badge badge-info">NVFP4</span>}
{m.sizeDisplay && <span>{m.sizeDisplay}</span>}
{isGpu && m.vramDisplay && (
<span className="rec-models-item-fit"><i className="fas fa-microchip" aria-hidden="true" /> {m.vramDisplay}</span>
)}
</div>
<button
type="button"
className="btn btn-primary btn-sm"
disabled={busy}
onClick={() => install(m.name)}
>
{busy
? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('recommended.installing')}</>)
: (<><i className="fas fa-download" aria-hidden="true" /> {t('recommended.install')}</>)}
</button>
</div>
)
})}
</div>
</div>
)
}

View File

@@ -1,129 +0,0 @@
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { modelsApi } from '../utils/api'
import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
// Static fallback used only when the live gallery / estimates can't be reached
// (offline, trimmed gallery). The hook is the primary, data-driven path; these
// are real gallery names kept as a safety net so onboarding never shows nothing.
// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes
// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below).
const BASE = {
cpu: [
{ name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' },
{ name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' },
{ name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
{ name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' },
],
'gpu-small': [
{ name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
{ name: 'lfm2.5-8b-a1b', size: '~5 GB' },
{ name: 'qwen3.5-9b', size: '~5.5 GB' },
{ name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' },
],
'gpu-mid': [
{ name: 'qwen3.6-27b', size: '~16 GB' },
{ name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
{ name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
{ name: 'qwen3.5-27b', size: '~16 GB' },
],
'gpu-large': [
{ name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
{ name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' },
{ name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
{ name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' },
],
}
// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with
// MTP (multi-token prediction) for speed. Only the mid/large tiers have these.
const NVIDIA = {
'gpu-mid': [
{ name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
{ name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
{ name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
{ name: 'qwen3.6-27b', size: '~16 GB' },
],
'gpu-large': [
{ name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' },
{ name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
{ name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
{ name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
],
}
function fallbackFor(tierId, isNvidia) {
if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId]
return BASE[tierId] || BASE.cpu
}
export default function StarterModels({ addToast, onInstallStarted }) {
const { t } = useTranslation('home')
const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 })
const [installing, setInstalling] = useState(() => new Set())
// While the hardware probe + gallery query are in flight, render nothing
// rather than flashing fallback content that may be replaced a moment later.
if (loading) return null
// Prefer live recommendations; fall back to the static list only when the
// gallery yielded nothing.
const items = (recommended && recommended.length > 0)
? recommended.map(r => ({ name: r.name, size: r.sizeDisplay }))
: fallbackFor(tier.id, isNvidia)
if (items.length === 0) return null
const install = async (name) => {
setInstalling(prev => new Set(prev).add(name))
try {
await modelsApi.install(name)
addToast?.(t('starters.installStarted', { model: name }), 'success')
onInstallStarted?.(name)
} catch (err) {
addToast?.(t('starters.installFailed', { message: err.message }), 'error')
setInstalling(prev => {
const next = new Set(prev)
next.delete(name)
return next
})
}
}
return (
<section className="home-starters card">
<div className="home-starters-head">
<strong>{t('starters.title')}</strong>
<span className="home-starters-tier">
<i className={`fas ${tier.id === 'cpu' ? 'fa-memory' : 'fa-microchip'}`} aria-hidden="true" />
{t(`starters.tier.${tier.id}`)}
</span>
</div>
<p className="home-starters-sub">
{tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
</p>
<ul className="home-starters-list">
{items.map(c => {
const busy = installing.has(c.name)
return (
<li key={c.name} className="home-starters-item">
<span className="home-starters-name">{c.name}</span>
{isNvfp4Name(c.name) && <span className="badge badge-info home-starters-badge">NVFP4</span>}
{c.size && <span className="home-starters-size">{c.size}</span>}
<button
type="button"
className="btn btn-primary btn-sm"
disabled={busy}
onClick={() => install(c.name)}
>
{busy
? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('starters.installing')}</>)
: (<><i className="fas fa-download" aria-hidden="true" /> {t('starters.install')}</>)}
</button>
</li>
)
})}
</ul>
</section>
)
}

View File

@@ -1,66 +0,0 @@
import { useEffect, useRef, useCallback } from 'react'
// usePolling runs `fn` immediately and then on a fixed interval, with two
// behaviours every hand-rolled setInterval in this app was missing:
//
// 1. Visibility-aware: the timer pauses while the tab is hidden
// (document.hidden) and fires an immediate catch-up poll when the tab
// becomes visible again. A backgrounded dashboard no longer hammers the
// server every few seconds for data nobody is looking at.
// 2. Non-overlapping: if `fn` returns a promise that takes longer than the
// interval, the next tick waits for it instead of stacking requests.
//
// `enabled: false` stops polling entirely (one-shot or gated polls). The
// returned `refetch` runs `fn` on demand and is stable across renders.
export function usePolling(fn, intervalMs = 5000, { enabled = true, immediate = true } = {}) {
const fnRef = useRef(fn)
fnRef.current = fn
const runningRef = useRef(false)
const refetch = useCallback(async () => {
// Guard against overlap: a slow poll shouldn't pile up behind a fast timer.
if (runningRef.current) return
runningRef.current = true
try {
return await fnRef.current()
} finally {
runningRef.current = false
}
}, [])
useEffect(() => {
if (!enabled) return
let timer = null
const tick = () => { refetch() }
const start = () => {
if (timer != null) return
timer = setInterval(tick, intervalMs)
}
const stop = () => {
if (timer != null) { clearInterval(timer); timer = null }
}
const onVisibility = () => {
if (document.hidden) {
stop()
} else {
// Catch up immediately on return, then resume the cadence.
tick()
start()
}
}
if (immediate) tick()
if (!document.hidden) start()
document.addEventListener('visibilitychange', onVisibility)
return () => {
stop()
document.removeEventListener('visibilitychange', onVisibility)
}
}, [enabled, intervalMs, immediate, refetch])
return { refetch }
}

View File

@@ -1,108 +0,0 @@
import { useState, useEffect } from 'react'
import { modelsApi } from '../utils/api'
import { useResources } from './useResources'
// Data-driven "recommended for your hardware" model picks. The gallery exposes
// no popularity/download signal and the list response carries no size, so we:
// 1. ask the server for chat-capable models in their natural (curated) order,
// 2. estimate size/VRAM for the top candidates (same endpoint the Models page
// uses), and
// 3. rank by hardware fit — smallest on CPU-only boxes, largest-that-fits on
// GPUs (bigger == better quality while still fitting VRAM).
//
// Returns `recommended === null` while loading, `[]` when nothing could be
// resolved (gallery/estimates unavailable) so callers can fall back.
const GB = 1024 * 1024 * 1024
const DEFAULT_CTX = 4096
// NVFP4 is a Blackwell/NVIDIA-specific 4-bit format — only worth suggesting on
// NVIDIA hardware, and to be filtered out elsewhere.
export const isNvfp4Name = (name) => /nvfp4/i.test(name || '')
export function hasNvidiaGpu(resources) {
return Array.isArray(resources?.gpus) &&
resources.gpus.some(g => (g?.vendor || '').toLowerCase() === 'nvidia')
}
export function recommendTier(resources) {
const isGpu = resources?.type === 'gpu'
const vram = resources?.aggregate?.total_memory || 0
if (!isGpu || vram <= 0) return { id: 'cpu', vram: 0 }
if (vram < 8 * GB) return { id: 'gpu-small', vram }
if (vram < 24 * GB) return { id: 'gpu-mid', vram }
return { id: 'gpu-large', vram }
}
function rank(candidates, tier, count, isNvidia) {
// NVFP4 only runs on NVIDIA (Blackwell) — drop it everywhere else, and prefer
// it on NVIDIA boxes where it's the fastest path.
const pool = candidates.filter(c => c.sizeBytes != null && (isNvidia || !isNvfp4Name(c.name)))
if (tier.id === 'cpu') {
// No GPU: smallest models stay responsive on CPU.
return [...pool].sort((a, b) => a.sizeBytes - b.sizeBytes).slice(0, count)
}
const limit = tier.vram * 0.95
const fits = pool.filter(c => c.vramBytes != null && c.vramBytes <= limit)
const base = fits.length > 0 ? fits : pool // tiny GPU where nothing fits → fall through to smallest
const byPreference = (a, b) => {
// On NVIDIA, surface NVFP4 first; then largest-that-fits (best quality).
if (isNvidia) {
const an = isNvfp4Name(a.name), bn = isNvfp4Name(b.name)
if (an !== bn) return an ? -1 : 1
}
return fits.length > 0 ? b.sizeBytes - a.sizeBytes : a.sizeBytes - b.sizeBytes
}
return [...base].sort(byPreference).slice(0, count)
}
export function useRecommendedModels({ count = 4, candidatePool = 10 } = {}) {
const { resources } = useResources()
const [recommended, setRecommended] = useState(null)
const [error, setError] = useState(null)
const resReady = resources !== null
const tier = recommendTier(resources)
const isNvidia = hasNvidiaGpu(resources)
useEffect(() => {
if (!resReady) return
let cancelled = false
setRecommended(null)
setError(null)
;(async () => {
try {
const data = await modelsApi.list({ tag: 'chat', items: candidatePool, page: 1 })
// Recommend models the user hasn't installed yet.
const models = (data?.models || []).filter(m => !m.installed)
const estimated = await Promise.all(models.map(async (m) => {
const name = m.name || m.id
try {
const e = await modelsApi.estimate(name, [DEFAULT_CTX])
const ctx = e?.estimates?.[String(DEFAULT_CTX)]
return {
name,
description: m.description,
sizeBytes: e?.sizeBytes ?? null,
sizeDisplay: e?.sizeDisplay ?? null,
vramBytes: ctx?.vramBytes ?? null,
vramDisplay: ctx?.vramDisplay ?? null,
}
} catch {
return { name, sizeBytes: null }
}
}))
if (cancelled) return
setRecommended(rank(estimated, tier, count, isNvidia))
} catch (e) {
if (cancelled) return
setError(e.message)
setRecommended([])
}
})()
return () => { cancelled = true }
// tier.id / tier.vram / isNvidia are primitives, so resource polling doesn't re-run this.
}, [resReady, tier.id, tier.vram, isNvidia, count, candidatePool])
return { recommended, tier, isNvidia, error, loading: recommended === null }
}

View File

@@ -1,11 +1,11 @@
import { useState, useCallback } from 'react' import { useState, useEffect, useCallback, useRef } from 'react'
import { resourcesApi } from '../utils/api' import { resourcesApi } from '../utils/api'
import { usePolling } from './usePolling'
export function useResources(pollInterval = 5000) { export function useResources(pollInterval = 5000) {
const [resources, setResources] = useState(null) const [resources, setResources] = useState(null)
const [loading, setLoading] = useState(true) const [loading, setLoading] = useState(true)
const [error, setError] = useState(null) const [error, setError] = useState(null)
const intervalRef = useRef(null)
const fetchResources = useCallback(async () => { const fetchResources = useCallback(async () => {
try { try {
@@ -19,10 +19,13 @@ export function useResources(pollInterval = 5000) {
} }
}, []) }, [])
// Visibility-aware polling: pauses while the tab is hidden and catches up on useEffect(() => {
// return (see usePolling). Resource stats are pure dashboard data, so there's fetchResources()
// no reason to keep fetching them for a backgrounded tab. intervalRef.current = setInterval(fetchResources, pollInterval)
const { refetch } = usePolling(fetchResources, pollInterval) return () => {
if (intervalRef.current) clearInterval(intervalRef.current)
}
}, [fetchResources, pollInterval])
return { resources, loading, error, refetch } return { resources, loading, error, refetch: fetchResources }
} }

View File

@@ -765,10 +765,8 @@ export default function AgentChat() {
className="chat-send-btn" className="chat-send-btn"
onClick={handleSend} onClick={handleSend}
disabled={processing || !input.trim()} disabled={processing || !input.trim()}
aria-label="Send message"
title="Send message"
> >
<i className="fas fa-paper-plane" aria-hidden="true" /> <i className="fas fa-paper-plane" />
</button> </button>
</div> </div>
</div> </div>

View File

@@ -1427,10 +1427,8 @@ export default function Chat() {
className="chat-send-btn" className="chat-send-btn"
onClick={handleSend} onClick={handleSend}
disabled={!input.trim() && files.length === 0} disabled={!input.trim() && files.length === 0}
aria-label={t('input.send')}
title={t('input.send')}
> >
<i className="fas fa-paper-plane" aria-hidden="true" /> <i className="fas fa-paper-plane" />
</button> </button>
)} )}
</div> </div>

View File

@@ -10,7 +10,6 @@ import UnifiedMCPDropdown from '../components/UnifiedMCPDropdown'
import ConfirmDialog from '../components/ConfirmDialog' import ConfirmDialog from '../components/ConfirmDialog'
import HomeConnect from '../components/HomeConnect' import HomeConnect from '../components/HomeConnect'
import { useResources } from '../hooks/useResources' import { useResources } from '../hooks/useResources'
import { usePolling } from '../hooks/usePolling'
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api' import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
import { API_CONFIG } from '../utils/config' import { API_CONFIG } from '../utils/config'
import { greetingKey } from '../utils/greeting' import { greetingKey } from '../utils/greeting'
@@ -18,7 +17,6 @@ import StatusPill from '../components/StatusPill'
import Skeleton from '../components/Skeleton' import Skeleton from '../components/Skeleton'
import SectionHeading from '../components/SectionHeading' import SectionHeading from '../components/SectionHeading'
import EmptyState from '../components/EmptyState' import EmptyState from '../components/EmptyState'
import StarterModels from '../components/StarterModels'
import { staggerStyle } from '../hooks/useStagger' import { staggerStyle } from '../hooks/useStagger'
export default function Home() { export default function Home() {
@@ -70,36 +68,40 @@ export default function Home() {
.catch(() => {}) .catch(() => {})
}, []) }, [])
// Poll cluster node data in distributed mode. Visibility-aware + gated on // Poll cluster node data in distributed mode
// distributedMode so a non-distributed or backgrounded tab makes no calls. useEffect(() => {
const fetchCluster = useCallback(async () => { if (!distributedMode) return
try { const fetchCluster = async () => {
const data = await nodesApi.list() try {
const nodes = Array.isArray(data) ? data : [] const data = await nodesApi.list()
const backendNodes = nodes.filter(n => !n.node_type || n.node_type === 'backend') const nodes = Array.isArray(data) ? data : []
const totalVRAM = backendNodes.reduce((sum, n) => sum + (n.total_vram || 0), 0) const backendNodes = nodes.filter(n => !n.node_type || n.node_type === 'backend')
const usedVRAM = backendNodes.reduce((sum, n) => { const totalVRAM = backendNodes.reduce((sum, n) => sum + (n.total_vram || 0), 0)
if (n.total_vram && n.available_vram != null) return sum + (n.total_vram - n.available_vram) const usedVRAM = backendNodes.reduce((sum, n) => {
return sum if (n.total_vram && n.available_vram != null) return sum + (n.total_vram - n.available_vram)
}, 0) return sum
const totalRAM = backendNodes.reduce((sum, n) => sum + (n.total_ram || 0), 0) }, 0)
const usedRAM = backendNodes.reduce((sum, n) => { const totalRAM = backendNodes.reduce((sum, n) => sum + (n.total_ram || 0), 0)
if (n.total_ram && n.available_ram != null) return sum + (n.total_ram - n.available_ram) const usedRAM = backendNodes.reduce((sum, n) => {
return sum if (n.total_ram && n.available_ram != null) return sum + (n.total_ram - n.available_ram)
}, 0) return sum
const isGPU = totalVRAM > 0 }, 0)
const healthyCount = backendNodes.filter(n => n.status === 'healthy').length const isGPU = totalVRAM > 0
const totalCount = backendNodes.length const healthyCount = backendNodes.filter(n => n.status === 'healthy').length
setClusterData({ const totalCount = backendNodes.length
totalMem: isGPU ? totalVRAM : totalRAM, setClusterData({
usedMem: isGPU ? usedVRAM : usedRAM, totalMem: isGPU ? totalVRAM : totalRAM,
isGPU, usedMem: isGPU ? usedVRAM : usedRAM,
healthyCount, isGPU,
totalCount, healthyCount,
}) totalCount,
} catch { setClusterData(null) } })
}, []) } catch { setClusterData(null) }
usePolling(fetchCluster, 5000, { enabled: distributedMode }) }
fetchCluster()
const interval = setInterval(fetchCluster, 5000)
return () => clearInterval(interval)
}, [distributedMode])
// Fetch configured models (to know if any exist) and loaded models (currently running) // Fetch configured models (to know if any exist) and loaded models (currently running)
const fetchSystemInfo = useCallback(async () => { const fetchSystemInfo = useCallback(async () => {
@@ -121,7 +123,11 @@ export default function Home() {
} }
}, []) }, [])
usePolling(fetchSystemInfo, 5000) useEffect(() => {
fetchSystemInfo()
const interval = setInterval(fetchSystemInfo, 5000)
return () => clearInterval(interval)
}, [fetchSystemInfo])
// Check MCP availability when selected model changes // Check MCP availability when selected model changes
useEffect(() => { useEffect(() => {
@@ -517,8 +523,6 @@ export default function Home() {
</div> </div>
</div> </div>
<StarterModels addToast={addToast} onInstallStarted={fetchSystemInfo} />
<div className="home-wizard-actions"> <div className="home-wizard-actions">
<button className="btn btn-primary" onClick={() => navigate('/app/models')}> <button className="btn btn-primary" onClick={() => navigate('/app/models')}>
<i className="fas fa-store" /> {t('wizard.browseGallery')} <i className="fas fa-store" /> {t('wizard.browseGallery')}

View File

@@ -13,7 +13,6 @@ import ConfirmDialog from '../components/ConfirmDialog'
import GalleryLoader from '../components/GalleryLoader' import GalleryLoader from '../components/GalleryLoader'
import Toggle from '../components/Toggle' import Toggle from '../components/Toggle'
import ResponsiveTable from '../components/ResponsiveTable' import ResponsiveTable from '../components/ResponsiveTable'
import RecommendedModels from '../components/RecommendedModels'
import React from 'react' import React from 'react'
@@ -302,8 +301,6 @@ export default function Models() {
} }
/> />
<RecommendedModels addToast={addToast} />
{/* Search */} {/* Search */}
<div className="search-bar" style={{ marginBottom: 'var(--spacing-md)' }}> <div className="search-bar" style={{ marginBottom: 'var(--spacing-md)' }}>
<i className="fas fa-search search-icon" /> <i className="fas fa-search search-icon" />

View File

@@ -24,37 +24,7 @@ function formatNumber(n) {
return String(n) return String(n)
} }
// Opt-in token pricing. LocalAI is self-hosted and has no inherent monetary function StatCard({ icon, label, value, muted }) {
// cost, but multi-user deployments use estimated cost for chargeback/budgeting.
// Prices are admin-supplied $ per 1M tokens, stored locally (per-browser), and
// the whole cost surface stays hidden until a non-zero price is set.
const TOKEN_PRICING_KEY = 'localai_token_pricing'
function loadPricing() {
try {
const p = JSON.parse(localStorage.getItem(TOKEN_PRICING_KEY) || '{}')
return { prompt: Number(p.prompt) || 0, completion: Number(p.completion) || 0 }
} catch { return { prompt: 0, completion: 0 } }
}
function savePricing(p) {
try { localStorage.setItem(TOKEN_PRICING_KEY, JSON.stringify(p)) } catch { /* ignore */ }
}
function pricingEnabled(p) { return (p?.prompt || 0) > 0 || (p?.completion || 0) > 0 }
function costOf(row, p) {
return (row.prompt_tokens / 1_000_000) * (p.prompt || 0)
+ (row.completion_tokens / 1_000_000) * (p.completion || 0)
}
function formatCost(n) {
if (!n) return '$0.00'
if (n < 0.01) return '<$0.01'
return '$' + n.toFixed(2)
}
function StatCard({ icon, label, value, muted, text }) {
return ( return (
<div className="card" style={{ padding: 'var(--spacing-sm) var(--spacing-md)', flex: '1 1 0', minWidth: 120, opacity: muted ? 0.7 : 1 }}> <div className="card" style={{ padding: 'var(--spacing-sm) var(--spacing-md)', flex: '1 1 0', minWidth: 120, opacity: muted ? 0.7 : 1 }}>
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}> <div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}>
@@ -62,7 +32,7 @@ function StatCard({ icon, label, value, muted, text }) {
<span style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', fontWeight: 500, textTransform: 'uppercase', letterSpacing: '0.03em' }}>{label}</span> <span style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', fontWeight: 500, textTransform: 'uppercase', letterSpacing: '0.03em' }}>{label}</span>
</div> </div>
<div style={{ fontSize: '1.375rem', fontWeight: 700, fontFamily: 'var(--font-mono)', color: muted ? 'var(--color-text-secondary)' : 'var(--color-text-primary)' }}> <div style={{ fontSize: '1.375rem', fontWeight: 700, fontFamily: 'var(--font-mono)', color: muted ? 'var(--color-text-secondary)' : 'var(--color-text-primary)' }}>
{text != null ? text : `${muted ? '~' : ''}${formatNumber(value)}`} {muted ? '~' : ''}{formatNumber(value)}
</div> </div>
</div> </div>
) )
@@ -672,10 +642,6 @@ export default function Usage() {
const [activeTab, setActiveTab] = useState('models') const [activeTab, setActiveTab] = useState('models')
const [quotas, setQuotas] = useState([]) const [quotas, setQuotas] = useState([])
const [selectedUserId, setSelectedUserId] = useState(null) const [selectedUserId, setSelectedUserId] = useState(null)
const [pricing, setPricingState] = useState(loadPricing)
const [showPricing, setShowPricing] = useState(false)
const setPricing = (p) => { setPricingState(p); savePricing(p) }
const costEnabled = pricingEnabled(pricing)
const fetchUsage = useCallback(async () => { const fetchUsage = useCallback(async () => {
setLoading(true) setLoading(true)
@@ -777,50 +743,11 @@ export default function Usage() {
<i className="fas fa-key" style={{ fontSize: '0.7rem' }} /> {t('usage.sources.tab')} <i className="fas fa-key" style={{ fontSize: '0.7rem' }} /> {t('usage.sources.tab')}
</button> </button>
<div style={{ flex: 1 }} /> <div style={{ flex: 1 }} />
<button
className={`btn btn-sm ${costEnabled ? 'btn-primary' : 'btn-secondary'}`}
onClick={() => setShowPricing(v => !v)}
style={{ gap: 4 }}
title="Set token pricing to estimate cost"
>
<i className="fas fa-dollar-sign" /> {costEnabled ? 'Pricing' : 'Set pricing'}
</button>
<button className="btn btn-secondary btn-sm" onClick={fetchUsage} disabled={loading} style={{ gap: 4 }}> <button className="btn btn-secondary btn-sm" onClick={fetchUsage} disabled={loading} style={{ gap: 4 }}>
<i className={`fas fa-rotate${loading ? ' fa-spin' : ''}`} /> Refresh <i className={`fas fa-rotate${loading ? ' fa-spin' : ''}`} /> Refresh
</button> </button>
</div> </div>
{showPricing && (
<div className="card" style={{ display: 'flex', alignItems: 'flex-end', gap: 'var(--spacing-md)', flexWrap: 'wrap', padding: 'var(--spacing-md)', marginBottom: 'var(--spacing-md)' }}>
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
<label style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', textTransform: 'uppercase', letterSpacing: '0.03em' }}>Prompt $/1M tokens</label>
<input
className="input" type="number" min="0" step="0.01" style={{ width: 140 }}
value={pricing.prompt || ''}
placeholder="0.00"
onChange={e => setPricing({ ...pricing, prompt: Number(e.target.value) || 0 })}
/>
</div>
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
<label style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', textTransform: 'uppercase', letterSpacing: '0.03em' }}>Completion $/1M tokens</label>
<input
className="input" type="number" min="0" step="0.01" style={{ width: 140 }}
value={pricing.completion || ''}
placeholder="0.00"
onChange={e => setPricing({ ...pricing, completion: Number(e.target.value) || 0 })}
/>
</div>
{costEnabled && (
<button className="btn btn-secondary btn-sm" onClick={() => setPricing({ prompt: 0, completion: 0 })} style={{ gap: 4 }}>
<i className="fas fa-times" /> Clear
</button>
)}
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', flex: '1 1 200px' }}>
Estimated cost only. Prices are stored in this browser and applied to recorded token counts.
</span>
</div>
)}
{loading ? ( {loading ? (
<div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}> <div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}>
<LoadingSpinner size="lg" /> <LoadingSpinner size="lg" />
@@ -833,9 +760,6 @@ export default function Usage() {
<StatCard icon="fas fa-arrow-up" label="Prompt" value={displayTotals.prompt_tokens} /> <StatCard icon="fas fa-arrow-up" label="Prompt" value={displayTotals.prompt_tokens} />
<StatCard icon="fas fa-arrow-down" label="Completion" value={displayTotals.completion_tokens} /> <StatCard icon="fas fa-arrow-down" label="Completion" value={displayTotals.completion_tokens} />
<StatCard icon="fas fa-coins" label="Total" value={displayTotals.total_tokens} /> <StatCard icon="fas fa-coins" label="Total" value={displayTotals.total_tokens} />
{costEnabled && (
<StatCard icon="fas fa-dollar-sign" label="Est. Cost" text={formatCost(costOf(displayTotals, pricing))} />
)}
</div> </div>
{/* Predictions */} {/* Predictions */}
@@ -865,7 +789,6 @@ export default function Usage() {
<th style={{ width: 110 }}>Prompt</th> <th style={{ width: 110 }}>Prompt</th>
<th style={{ width: 110 }}>Completion</th> <th style={{ width: 110 }}>Completion</th>
<th style={{ width: 110 }}>Total</th> <th style={{ width: 110 }}>Total</th>
{costEnabled && <th style={{ width: 100 }}>Est. Cost</th>}
<th style={{ width: 140 }}></th> <th style={{ width: 140 }}></th>
</tr> </tr>
</thead> </thead>
@@ -877,7 +800,6 @@ export default function Usage() {
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td> <td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td> <td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td> <td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
{costEnabled && <td style={monoCell}>{formatCost(costOf(row, pricing))}</td>}
<td><UsageBar value={row.total_tokens} max={maxTokens} /></td> <td><UsageBar value={row.total_tokens} max={maxTokens} /></td>
</tr> </tr>
))} ))}
@@ -905,7 +827,6 @@ export default function Usage() {
<th style={{ width: 110 }}>Prompt</th> <th style={{ width: 110 }}>Prompt</th>
<th style={{ width: 110 }}>Completion</th> <th style={{ width: 110 }}>Completion</th>
<th style={{ width: 110 }}>Total</th> <th style={{ width: 110 }}>Total</th>
{costEnabled && <th style={{ width: 100 }}>Est. Cost</th>}
<th style={{ width: 110 }}>Proj. Total</th> <th style={{ width: 110 }}>Proj. Total</th>
<th style={{ width: 140 }}></th> <th style={{ width: 140 }}></th>
</tr> </tr>
@@ -928,7 +849,6 @@ export default function Usage() {
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td> <td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td> <td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td> <td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
{costEnabled && <td style={monoCell}>{formatCost(costOf(row, pricing))}</td>}
<td style={{ ...monoCell, color: 'var(--color-text-muted)', fontStyle: 'italic' }}> <td style={{ ...monoCell, color: 'var(--color-text-muted)', fontStyle: 'italic' }}>
{up?.predictions ? `~${formatNumber(up.predictions.projectedTotals.total_tokens)}` : '-'} {up?.predictions ? `~${formatNumber(up.predictions.projectedTotals.total_tokens)}` : '-'}
</td> </td>
@@ -936,7 +856,7 @@ export default function Usage() {
</tr> </tr>
{isExpanded && up && ( {isExpanded && up && (
<tr> <tr>
<td colSpan={costEnabled ? 9 : 8} style={{ padding: 0, background: 'var(--color-bg-secondary)' }}> <td colSpan={8} style={{ padding: 0, background: 'var(--color-bg-secondary)' }}>
<div style={{ padding: 'var(--spacing-md)' }}> <div style={{ padding: 'var(--spacing-md)' }}>
{up.predictions && ( {up.predictions && (
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(100px, 1fr))', gap: 'var(--spacing-xs)', marginBottom: 'var(--spacing-sm)' }}> <div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(100px, 1fr))', gap: 'var(--spacing-xs)', marginBottom: 'var(--spacing-sm)' }}>

View File

@@ -156,10 +156,7 @@ func applyNodeHardwareDefaults(opts *pb.ModelOptions, node *BackendNode) {
VRAM: node.TotalVRAM, VRAM: node.TotalVRAM,
} }
if config.IsManagedPhysicalBatch(int(opts.NBatch)) { if config.IsManagedPhysicalBatch(int(opts.NBatch)) {
// Gate the raised batch on the selected node's per-device VRAM at this opts.NBatch = int32(config.PhysicalBatch(gpu))
// model's context, so a large context can't overflow the node's compute
// buffer (issue #10485). node.TotalVRAM is the node's reported ceiling.
opts.NBatch = int32(config.PhysicalBatchForContext(gpu, int(opts.ContextSize)))
} }
// Default concurrent serving for the selected node (the frontend that built // Default concurrent serving for the selected node (the frontend that built
// the options may have no GPU). Only adds when no parallel option is set. // the options may have no GPU). Only adds when no parallel option is set.

View File

@@ -8,19 +8,12 @@ import (
) )
var _ = Describe("applyNodeHardwareDefaults", func() { var _ = Describe("applyNodeHardwareDefaults", func() {
It("raises a managed default batch on a Blackwell node with headroom", func() { It("raises a managed default batch on a Blackwell node", func() {
opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch, ContextSize: 8192} opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch}
applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.1", TotalVRAM: 119 << 30}) applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.1"})
Expect(opts.NBatch).To(BeEquivalentTo(config.BlackwellPhysicalBatch)) Expect(opts.NBatch).To(BeEquivalentTo(config.BlackwellPhysicalBatch))
}) })
It("keeps the default batch when a large context would overflow the node", func() {
// Regression guard for issue #10485 on the distributed path.
opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch, ContextSize: 204800}
applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.0", TotalVRAM: 16 << 30})
Expect(opts.NBatch).To(BeEquivalentTo(config.DefaultPhysicalBatch))
})
It("resets a Blackwell guess on a non-Blackwell node", func() { It("resets a Blackwell guess on a non-Blackwell node", func() {
// frontend (Blackwell) guessed high, but the selected node is not Blackwell // frontend (Blackwell) guessed high, but the selected node is not Blackwell
opts := &pb.ModelOptions{NBatch: config.BlackwellPhysicalBatch} opts := &pb.ModelOptions{NBatch: config.BlackwellPhysicalBatch}

View File

@@ -1,3 +1,3 @@
{ {
"version": "v4.5.0" "version": "v4.4.3"
} }

View File

@@ -3,7 +3,24 @@
url: "github:mudler/LocalAI/gallery/virtual.yaml@master" url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls: urls:
- https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF - https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF
description: "Try LFM • Docs • LEAP • Discord\n\n# LFM2.5-1.2B-Instruct\n\nLFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.\n\n - **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.\n - **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.\n - **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.\n\nFind more information about LFM2.5 in our blog post.\n\n## \U0001F5D2 Model Details\n\nLFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:\n\n...\n" description: |
Try LFM • Docs • LEAP • Discord
# LFM2.5-1.2B-Instruct
LFM2.5 is a new family of hybrid models designed for **on-device deployment**. It builds on the LFM2 architecture with extended pre-training and reinforcement learning.
- **Best-in-class performance**: A 1.2B model rivaling much larger models, bringing high-quality AI to your pocket.
- **Fast edge inference**: 239 tok/s decode on AMD CPU, 82 tok/s on mobile NPU. Runs under 1GB of memory with day-one support for llama.cpp, MLX, and vLLM.
- **Scaled training**: Extended pre-training from 10T to 28T tokens and large-scale multi-stage reinforcement learning.
Find more information about LFM2.5 in our blog post.
## 🗒️ Model Details
LFM2.5-1.2B-Instruct is a general-purpose text-only model with the following features:
...
license: "other" license: "other"
tags: tags:
- llm - llm
@@ -825,8 +842,8 @@
use_tokenizer_template: true use_tokenizer_template: true
files: files:
- filename: llama-cpp/models/Qwopus3.6-27B-Coder-MTP-GGUF/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf - filename: llama-cpp/models/Qwopus3.6-27B-Coder-MTP-GGUF/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf
sha256: b2898667ed7b2388f0ab7691393833ae777f247492bbe62fdb4b2bd3e3cf3f79
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/Qwopus3.6-27B-Coder-MTP-Q4_K_M.gguf
sha256: b2b9180093496da2e00439e3fa23227c591355901bfa579bc6897bbc01b755ef
- filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-MTP-GGUF/mmproj-F32.gguf - filename: llama-cpp/mmproj/Qwopus3.6-27B-Coder-MTP-GGUF/mmproj-F32.gguf
sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c sha256: 32f7ea0600c07272547da401d460f8abbd980f3a57b69d6df87be0e2505e0b9c
uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/mmproj-F32.gguf uri: https://huggingface.co/Jackrong/Qwopus3.6-27B-Coder-MTP-GGUF/resolve/main/mmproj-F32.gguf

View File

@@ -129,61 +129,6 @@ func TotalAvailableVRAM() (uint64, error) {
return 0, nil return 0, nil
} }
// MinPerGPUVRAM returns the total VRAM of the SMALLEST GPU on the host (in
// bytes), or 0 when no per-device VRAM is known. Unlike TotalAvailableVRAM
// (which sums across devices) this reports a single device's ceiling, which is
// the right figure for decisions about what must fit on one card: the compute
// buffer (sized by n_ubatch) and the parallel-slot tier. Summing a multi-GPU
// host's VRAM over-provisions those into a per-device OOM (issue #10485).
//
// Unified-memory devices (GB10, Apple) report system RAM as their single
// device's VRAM, so they are unaffected.
func MinPerGPUVRAM() (uint64, error) {
// Prefer per-device binary detection (nvidia-smi/rocm-smi report true
// per-card VRAM); ghw's per-card memory can reflect NUMA node RAM on some
// hosts, which is why TotalAvailableVRAM treats it as a sum.
if infos := GetGPUMemoryUsage(); len(infos) > 0 {
if v := minNonZeroVRAM(infos); v > 0 {
return v, nil
}
}
// Fallback: ghw per-card memory, taking the minimum non-zero card.
if gpus, err := GPUs(); err == nil {
var min uint64
for _, gpu := range gpus {
if gpu == nil || gpu.Node == nil || gpu.Node.Memory == nil {
continue
}
if b := gpu.Node.Memory.TotalUsableBytes; b > 0 {
if u := uint64(b); min == 0 || u < min {
min = u
}
}
}
if min > 0 {
return min, nil
}
}
return 0, nil
}
// minNonZeroVRAM returns the smallest non-zero TotalVRAM across the given GPUs,
// or 0 when none report VRAM.
func minNonZeroVRAM(infos []GPUMemoryInfo) uint64 {
var min uint64
for _, g := range infos {
if g.TotalVRAM == 0 {
continue
}
if min == 0 || g.TotalVRAM < min {
min = g.TotalVRAM
}
}
return min
}
func HasGPU(vendor string) bool { func HasGPU(vendor string) bool {
gpus, err := GPUs() gpus, err := GPUs()
if err != nil { if err != nil {

View File

@@ -1,37 +0,0 @@
package xsysinfo
import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("minNonZeroVRAM", func() {
const gib = uint64(1) << 30
It("returns the smallest device on a multi-GPU host", func() {
// Two unequal cards (e.g. RTX 5070 Ti + 5060 Ti, both 16 GiB, or a
// mixed pair): the smallest device is the per-card allocation ceiling.
infos := []GPUMemoryInfo{
{TotalVRAM: 16 * gib},
{TotalVRAM: 12 * gib},
}
Expect(minNonZeroVRAM(infos)).To(Equal(12 * gib))
})
It("ignores devices that report zero VRAM", func() {
infos := []GPUMemoryInfo{
{TotalVRAM: 0},
{TotalVRAM: 24 * gib},
}
Expect(minNonZeroVRAM(infos)).To(Equal(24 * gib))
})
It("returns the single device's VRAM on a one-GPU host", func() {
Expect(minNonZeroVRAM([]GPUMemoryInfo{{TotalVRAM: 16 * gib}})).To(Equal(16 * gib))
})
It("returns 0 when no device reports VRAM", func() {
Expect(minNonZeroVRAM([]GPUMemoryInfo{{TotalVRAM: 0}})).To(BeZero())
Expect(minNonZeroVRAM(nil)).To(BeZero())
})
})