diff --git a/.github/backend-matrix.yml b/.github/backend-matrix.yml
index ea489dead..5ad6d9e16 100644
--- a/.github/backend-matrix.yml
+++ b/.github/backend-matrix.yml
@@ -4981,6 +4981,9 @@ includeDarwin:
- backend: "vllm"
tag-suffix: "-metal-darwin-arm64-vllm"
build-type: "mps"
+ - backend: "trl"
+ tag-suffix: "-metal-darwin-arm64-trl"
+ build-type: "mps"
- backend: "liquid-audio"
tag-suffix: "-metal-darwin-arm64-liquid-audio"
build-type: "mps"
diff --git a/backend/cpp/ik-llama-cpp/Makefile b/backend/cpp/ik-llama-cpp/Makefile
index 0fbcf0bdb..860606253 100644
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@
-IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
+IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
CMAKE_ARGS?=
diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
index 24f1f215d..f00fad518 100644
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
-LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
+LLAMA_VERSION?=8be759e6f70d629638a7eb70db3824cbdcea370b
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=
diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index c2e7f22e4..6907b9122 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -37,6 +37,7 @@
#include "backend.pb.h"
#include "backend.grpc.pb.h"
#include "common.h"
+#include "arg.h"
#include "chat-auto-parser.h"
#include
#include
@@ -592,6 +593,10 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
params.checkpoint_min_step = 256;
#endif
+ // Raw upstream llama-server flags collected from any option entry that
+ // starts with '-'. Applied once after the loop via common_params_parse.
+ std::vector extra_argv;
+
// decode options. Options are in form optname:optvale, or if booleans only optname.
for (int i = 0; i < request->options_size(); i++) {
std::string opt = request->options(i);
@@ -1080,6 +1085,31 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
} catch (...) {}
}
+ // --- main model MoE on CPU (upstream --cpu-moe / --n-cpu-moe) ---
+ } else if (!strcmp(optname, "cpu_moe")) {
+ // Bool-style flag: keep all MoE expert weights on CPU.
+ const bool enable = (optval == NULL) ||
+ optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
+ optval_str == "on" || optval_str == "enabled";
+ if (enable) {
+ params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
+ }
+ } else if (!strcmp(optname, "n_cpu_moe")) {
+ if (optval != NULL) {
+ try {
+ int n = std::stoi(optval_str);
+ if (n < 0) n = 0;
+ // Keep override-name storage alive for the lifetime of the
+ // params struct (mirrors upstream arg.cpp's function-local static).
+ static std::list buft_overrides_main;
+ for (int i = 0; i < n; ++i) {
+ buft_overrides_main.push_back(llm_ffn_exps_block_regex(i));
+ params.tensor_buft_overrides.push_back(
+ {buft_overrides_main.back().c_str(), ggml_backend_cpu_buffer_type()});
+ }
+ } catch (...) {}
+ }
+
// --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
} else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
// Format: =,=,...
@@ -1111,6 +1141,30 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
else { cur.push_back(c); }
}
if (!cur.empty()) flush(cur);
+
+ // --- generic passthrough: any entry starting with '-' is a raw
+ // upstream llama-server flag, forwarded verbatim to the parser. ---
+ } else if (optname[0] == '-') {
+ std::string flag = optname;
+ // These flags make upstream's parser exit() (printing usage /
+ // completion), which would kill the backend process. Skip them.
+ if (flag == "-h" || flag == "--help" || flag == "--usage" ||
+ flag == "--version" || flag == "--license" ||
+ flag == "--list-devices" || flag == "-cl" ||
+ flag == "--cache-list" ||
+ flag.rfind("--completion", 0) == 0) {
+ fprintf(stderr,
+ "[llama-cpp] ignoring passthrough flag that would exit: %s\n",
+ flag.c_str());
+ } else {
+ extra_argv.push_back(flag);
+ // Preserve the whole value after the first ':' so embedded
+ // colons (e.g. host:port) survive strtok's truncation of optval.
+ auto colon = opt.find(':');
+ if (colon != std::string::npos) {
+ extra_argv.push_back(opt.substr(colon + 1));
+ }
+ }
}
}
@@ -1146,27 +1200,6 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
}
}
- if (!params.kv_overrides.empty()) {
- params.kv_overrides.emplace_back();
- params.kv_overrides.back().key[0] = 0;
- }
-
- // tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
- // Real entries are pushed during option parsing; here we pad/terminate so the
- // model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
- // and so llama_params_fit has the placeholder slots it requires.
- {
- const size_t ntbo = llama_max_tensor_buft_overrides();
- while (params.tensor_buft_overrides.size() < ntbo) {
- params.tensor_buft_overrides.push_back({nullptr, nullptr});
- }
- }
- // Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
- // the main-model handling above.
- if (!params.speculative.draft.tensor_buft_overrides.empty()) {
- params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
- }
-
// TODO: Add yarn
if (!request->tensorsplit().empty()) {
@@ -1259,6 +1292,69 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
params.sampling.grammar_triggers.push_back(std::move(trigger));
}
}
+
+ // Apply any raw upstream flags last so an explicit passthrough flag wins
+ // over the LocalAI-resolved field it maps to (e.g. --ctx-size beats
+ // context_size). This is the same parser llama-server itself uses.
+ if (!extra_argv.empty()) {
+ // common_params_parser_init resets a few fields for the SERVER example
+ // (n_parallel -> -1, use_color). Snapshot n_parallel so an unrelated
+ // passthrough flag can't silently clobber LocalAI's resolved value.
+ const int saved_n_parallel = params.n_parallel;
+
+ std::vector argv;
+ std::string prog = "llama-server";
+ argv.push_back(prog.data());
+ for (auto & a : extra_argv) {
+ argv.push_back(a.data());
+ }
+
+ // ctx_arg.params is a reference, so this overlays the given flags onto
+ // `params` in place. Returns false on a recoverable parse error (and
+ // self-restores params); may exit() on a hard error, exactly as
+ // passing the same bad flag to llama-server would.
+ if (!common_params_parse((int)argv.size(), argv.data(), params,
+ LLAMA_EXAMPLE_SERVER)) {
+ fprintf(stderr,
+ "[llama-cpp] failed to parse passthrough options; ignoring them\n");
+ }
+
+ // Restore n_parallel unless a passthrough flag explicitly set it
+ // (parser_init's reset sentinel for SERVER is -1).
+ if (params.n_parallel == -1) {
+ params.n_parallel = saved_n_parallel;
+ }
+ }
+
+ // Terminate/pad the override vectors only after BOTH the named-option loop
+ // and the generic passthrough (common_params_parse above) have pushed their
+ // real entries, so back() is the null sentinel the model loader asserts on.
+ // Running these before the passthrough let a passthrough flag (--cpu-moe,
+ // --override-tensor, --override-kv, ...) append a real entry after the
+ // sentinel: a GGML_ASSERT crash for tensor_buft_overrides, a silent drop for
+ // kv_overrides. Double-termination is harmless (the while is a no-op if the
+ // passthrough parse already padded; an extra trailing null is ignored).
+
+ if (!params.kv_overrides.empty()) {
+ params.kv_overrides.emplace_back();
+ params.kv_overrides.back().key[0] = 0;
+ }
+
+ // tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
+ // Real entries are pushed during option parsing; here we pad/terminate so the
+ // model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
+ // and so llama_params_fit has the placeholder slots it requires.
+ {
+ const size_t ntbo = llama_max_tensor_buft_overrides();
+ while (params.tensor_buft_overrides.size() < ntbo) {
+ params.tensor_buft_overrides.push_back({nullptr, nullptr});
+ }
+ }
+ // Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
+ // the main-model handling above.
+ if (!params.speculative.draft.tensor_buft_overrides.empty()) {
+ params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
+ }
}
diff --git a/backend/go/acestep-cpp/Makefile b/backend/go/acestep-cpp/Makefile
index 0b1929b94..3332ce1b6 100644
--- a/backend/go/acestep-cpp/Makefile
+++ b/backend/go/acestep-cpp/Makefile
@@ -117,7 +117,8 @@ libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
cd .. && \
- mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgoacestepcpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: acestep-cpp
@echo "Running acestep-cpp tests..."
diff --git a/backend/go/acestep-cpp/main.go b/backend/go/acestep-cpp/main.go
index c65afb335..e4c1378b8 100644
--- a/backend/go/acestep-cpp/main.go
+++ b/backend/go/acestep-cpp/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -22,7 +23,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("ACESTEP_LIBRARY")
if libName == "" {
- libName = "./libgoacestepcpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgoacestepcpp-fallback.dylib"
+ } else {
+ libName = "./libgoacestepcpp-fallback.so"
+ }
}
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/acestep-cpp/package.sh b/backend/go/acestep-cpp/package.sh
index d922c5b86..5fecf3455 100755
--- a/backend/go/acestep-cpp/package.sh
+++ b/backend/go/acestep-cpp/package.sh
@@ -13,6 +13,7 @@ mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/acestep-cpp $CURDIR/package/
cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgoacestepcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/acestep-cpp/run.sh b/backend/go/acestep-cpp/run.sh
index d901e2c85..bcdfbc09e 100755
--- a/backend/go/acestep-cpp/run.sh
+++ b/backend/go/acestep-cpp/run.sh
@@ -12,9 +12,19 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single library variant (Metal or Accelerate). The goacestepcpp
+ # target is built as a CMake MODULE, which emits a .dylib for a SHARED
+ # build but a .so for a MODULE build on Apple, so prefer .dylib and fall
+ # back to .so.
+ LIBRARY="$CURDIR/libgoacestepcpp-fallback.dylib"
+ if [ ! -e "$LIBRARY" ]; then
+ LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
+ fi
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
@@ -36,9 +46,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export ACESTEP_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/ced/Makefile b/backend/go/ced/Makefile
index 632c0e255..2b15990ec 100644
--- a/backend/go/ced/Makefile
+++ b/backend/go/ced/Makefile
@@ -57,6 +57,7 @@ libced.so: sources/ced.cpp
cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
+ cp -fv sources/ced.cpp/build-shared/libced.dylib ./ 2>/dev/null || true
cp -fv sources/ced.cpp/include/ced_capi.h ./
ced-grpc: libced.so main.go goced.go
diff --git a/backend/go/ced/main.go b/backend/go/ced/main.go
index ea8aa8549..b6c93a9f9 100644
--- a/backend/go/ced/main.go
+++ b/backend/go/ced/main.go
@@ -12,6 +12,7 @@ import (
"flag"
"fmt"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ type libFunc struct {
func main() {
libName := os.Getenv("CED_LIBRARY")
if libName == "" {
- libName = "libced.so"
+ if runtime.GOOS == "darwin" {
+ libName = "libced.dylib"
+ } else {
+ libName = "libced.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
diff --git a/backend/go/ced/package.sh b/backend/go/ced/package.sh
index bde0adad6..ff20d727f 100755
--- a/backend/go/ced/package.sh
+++ b/backend/go/ced/package.sh
@@ -15,10 +15,12 @@ mkdir -p "$CURDIR/package/lib"
cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
-cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || {
- echo "ERROR: libced.so not found in $CURDIR, run 'make' first" >&2
+cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || true
+cp -avf "$CURDIR"/libced.dylib "$CURDIR/package/lib/" 2>/dev/null || true
+if ! ls "$CURDIR"/package/lib/libced.* >/dev/null 2>&1; then
+ echo "ERROR: libced shared library not found in $CURDIR, run 'make' first" >&2
exit 1
-}
+fi
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
echo "Detected x86_64 architecture, copying x86_64 libraries..."
diff --git a/backend/go/ced/run.sh b/backend/go/ced/run.sh
index bce6fec8e..1f95f748f 100755
--- a/backend/go/ced/run.sh
+++ b/backend/go/ced/run.sh
@@ -3,7 +3,12 @@ set -e
CURDIR=$(dirname "$(realpath "$0")")
-export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+if [ "$(uname)" = "Darwin" ]; then
+ export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
+ export CED_LIBRARY="$CURDIR/lib/libced.dylib"
+else
+ export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+fi
# If a self-contained ld.so was packaged, route through it so the packaged
# libc / libstdc++ are used instead of the host's (matches the sibling backends).
diff --git a/backend/go/crispasr/Makefile b/backend/go/crispasr/Makefile
index ba55b485e..1b32240e3 100644
--- a/backend/go/crispasr/Makefile
+++ b/backend/go/crispasr/Makefile
@@ -75,7 +75,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
else
- VARIANT_TARGETS = libgocrispasr-fallback.so
+ # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+ VARIANT_TARGETS = libgocrispasr-fallback.dylib
endif
crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
@@ -87,7 +88,7 @@ package: crispasr
build: package
clean: purge
- rm -rf libgocrispasr*.so package sources/CrispASR crispasr
+ rm -rf libgocrispasr*.so libgocrispasr*.dylib package sources/CrispASR crispasr
purge:
rm -rf build*
@@ -118,13 +119,21 @@ libgocrispasr-fallback.so: sources/CrispASR
SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
rm -rfv build*
+# Build fallback variant as a dylib (Darwin)
+libgocrispasr-fallback.dylib: sources/CrispASR
+ $(MAKE) purge
+ $(info ${GREEN}I crispasr build info:fallback (dylib)${RESET})
+ SO_TARGET=libgocrispasr-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
+ rm -rfv build*
+
libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgocrispasr.dylib ./$(SO_TARGET) 2>/dev/null)
test: crispasr
CGO_ENABLED=0 $(GOCMD) test -v ./...
diff --git a/backend/go/crispasr/main.go b/backend/go/crispasr/main.go
index 9f3ef14d0..a1f132cc5 100644
--- a/backend/go/crispasr/main.go
+++ b/backend/go/crispasr/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() {
libName := os.Getenv("CRISPASR_LIBRARY")
if libName == "" {
- libName = "./libgocrispasr-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgocrispasr-fallback.dylib"
+ } else {
+ libName = "./libgocrispasr-fallback.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/crispasr/package.sh b/backend/go/crispasr/package.sh
index baee12944..9b89dad1b 100755
--- a/backend/go/crispasr/package.sh
+++ b/backend/go/crispasr/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/crispasr $CURDIR/package/
-cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/
+cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgocrispasr-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/crispasr/run.sh b/backend/go/crispasr/run.sh
index ccb264833..6d3c4b216 100755
--- a/backend/go/crispasr/run.sh
+++ b/backend/go/crispasr/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgocrispasr-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgocrispasr-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgocrispasr-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgocrispasr-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export CRISPASR_LIBRARY=$LIBRARY
# Point piper's espeak-ng phonemizer at the bundled voice data. The variable
diff --git a/backend/go/depth-anything-cpp/Makefile b/backend/go/depth-anything-cpp/Makefile
index f1a0b9f97..efe99a626 100644
--- a/backend/go/depth-anything-cpp/Makefile
+++ b/backend/go/depth-anything-cpp/Makefile
@@ -77,7 +77,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
else
# On non-Linux (e.g., Darwin), build only fallback variant
- VARIANT_TARGETS = libdepthanythingcpp-fallback.so
+ VARIANT_TARGETS = libdepthanythingcpp-fallback.dylib
endif
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
@@ -89,7 +89,7 @@ package: depth-anything-cpp
build: package
clean: purge
- rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
+ rm -rf libdepthanythingcpp*.so libdepthanythingcpp*.dylib depth-anything-cpp package sources
purge:
rm -rf build*
@@ -116,11 +116,19 @@ libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
endif
# Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+libdepthanythingcpp-fallback.dylib: sources/depth-anything.cpp
+ rm -rfv build-$@
+ $(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
+ SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
+ rm -rfv build-$@
+else
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
rm -rfv build-$@
+endif
libdepthanythingcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \
@@ -128,7 +136,8 @@ libdepthanythingcpp-custom: CMakeLists.txt
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libdepthanything.dylib ./$(SO_TARGET) 2>/dev/null)
all: depth-anything-cpp package
diff --git a/backend/go/depth-anything-cpp/main.go b/backend/go/depth-anything-cpp/main.go
index 4c4546797..cfad88b23 100644
--- a/backend/go/depth-anything-cpp/main.go
+++ b/backend/go/depth-anything-cpp/main.go
@@ -9,6 +9,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
if libName == "" {
- libName = "./libdepthanythingcpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libdepthanythingcpp-fallback.dylib"
+ } else {
+ libName = "./libdepthanythingcpp-fallback.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/depth-anything-cpp/package.sh b/backend/go/depth-anything-cpp/package.sh
index 4690555ea..5bbd5559b 100755
--- a/backend/go/depth-anything-cpp/package.sh
+++ b/backend/go/depth-anything-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory
mkdir -p $CURDIR/package/lib
-cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libdepthanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
diff --git a/backend/go/depth-anything-cpp/run.sh b/backend/go/depth-anything-cpp/run.sh
index 984aa5849..cbff6b0b5 100755
--- a/backend/go/depth-anything-cpp/run.sh
+++ b/backend/go/depth-anything-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export DEPTHANYTHING_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/localvqe/Makefile b/backend/go/localvqe/Makefile
index 7b66e9371..049da0cdd 100644
--- a/backend/go/localvqe/Makefile
+++ b/backend/go/localvqe/Makefile
@@ -67,8 +67,9 @@ $(LIB_SENTINEL): sources/LocalVQE
# that the loader picks at runtime. We must build every target — the
# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
# routes all of them into build/bin; copy them out next to the binary.
- cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.so* .
+ cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/bin/liblocalvqe.dylib . 2>/dev/null || cp -P build/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.dylib .
cp -P build/bin/libggml*.so* . 2>/dev/null || true
+ cp -P build/bin/libggml*.dylib . 2>/dev/null || true
touch $(LIB_SENTINEL)
liblocalvqe.so: $(LIB_SENTINEL)
diff --git a/backend/go/localvqe/main.go b/backend/go/localvqe/main.go
index 56ed2de2f..cbaa2a134 100644
--- a/backend/go/localvqe/main.go
+++ b/backend/go/localvqe/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() {
libName := os.Getenv("LOCALVQE_LIBRARY")
if libName == "" {
- libName = "./liblocalvqe.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./liblocalvqe.dylib"
+ } else {
+ libName = "./liblocalvqe.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/localvqe/package.sh b/backend/go/localvqe/package.sh
index ca8dfd3ab..9f9f2533d 100755
--- a/backend/go/localvqe/package.sh
+++ b/backend/go/localvqe/package.sh
@@ -15,7 +15,9 @@ cp -avf $CURDIR/localvqe $CURDIR/package/
# liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
# variants — LocalVQE picks the matching CPU variant at load time.
cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
+cp -P $CURDIR/liblocalvqe.dylib $CURDIR/package/ 2>/dev/null || true
cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
+cp -P $CURDIR/libggml*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/localvqe/run.sh b/backend/go/localvqe/run.sh
index 0f3192e31..d14d427c4 100755
--- a/backend/go/localvqe/run.sh
+++ b/backend/go/localvqe/run.sh
@@ -10,8 +10,19 @@ CURDIR=$(dirname "$(realpath $0)")
# exec'ing the binary.
cd "$CURDIR"
-export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
-export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
+ # DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
+ export DYLD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$DYLD_LIBRARY_PATH
+ LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.dylib
+ if [ ! -e "$LOCALVQE_LIBRARY" ]; then
+ LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
+ fi
+ export LOCALVQE_LIBRARY
+else
+ export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
+ export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
+fi
if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so"
diff --git a/backend/go/locate-anything-cpp/Makefile b/backend/go/locate-anything-cpp/Makefile
index 91dbc41c2..ba12c7195 100644
--- a/backend/go/locate-anything-cpp/Makefile
+++ b/backend/go/locate-anything-cpp/Makefile
@@ -70,7 +70,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
else
# On non-Linux (e.g., Darwin), build only fallback variant
- VARIANT_TARGETS = liblocateanythingcpp-fallback.so
+ VARIANT_TARGETS = liblocateanythingcpp-fallback.dylib
endif
locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
@@ -82,7 +82,7 @@ package: locate-anything-cpp
build: package
clean: purge
- rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources
+ rm -rf liblocateanythingcpp*.so liblocateanythingcpp*.dylib locate-anything-cpp package sources
purge:
rm -rf build*
@@ -109,11 +109,19 @@ liblocateanythingcpp-avx512.so: sources/locate-anything.cpp
endif
# Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+liblocateanythingcpp-fallback.dylib: sources/locate-anything.cpp
+ rm -rfv build-$@
+ $(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
+ SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
+ rm -rfv build-$@
+else
liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
rm -rfv build-$@
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
rm -rfv build-$@
+endif
liblocateanythingcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \
@@ -121,7 +129,8 @@ liblocateanythingcpp-custom: CMakeLists.txt
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/liblocateanythingcpp.dylib ./$(SO_TARGET) 2>/dev/null)
all: locate-anything-cpp package
diff --git a/backend/go/locate-anything-cpp/main.go b/backend/go/locate-anything-cpp/main.go
index 91ccaf38e..77e53bb95 100644
--- a/backend/go/locate-anything-cpp/main.go
+++ b/backend/go/locate-anything-cpp/main.go
@@ -9,6 +9,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("LOCATEANYTHING_LIBRARY")
if libName == "" {
- libName = "./liblocateanythingcpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./liblocateanythingcpp-fallback.dylib"
+ } else {
+ libName = "./liblocateanythingcpp-fallback.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/locate-anything-cpp/package.sh b/backend/go/locate-anything-cpp/package.sh
index 3b1f13428..1e6cbee80 100755
--- a/backend/go/locate-anything-cpp/package.sh
+++ b/backend/go/locate-anything-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory
mkdir -p $CURDIR/package/lib
-cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/liblocateanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
diff --git a/backend/go/locate-anything-cpp/run.sh b/backend/go/locate-anything-cpp/run.sh
index cefbff629..4eebb3c63 100755
--- a/backend/go/locate-anything-cpp/run.sh
+++ b/backend/go/locate-anything-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export LOCATEANYTHING_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/omnivoice-cpp/Makefile b/backend/go/omnivoice-cpp/Makefile
index b42610aac..36b447b13 100644
--- a/backend/go/omnivoice-cpp/Makefile
+++ b/backend/go/omnivoice-cpp/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# omnivoice.cpp version
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
-OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
+OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
SO_TARGET?=libgomnivoicecpp.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
@@ -65,7 +65,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
else
- VARIANT_TARGETS = libgomnivoicecpp-fallback.so
+ # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+ VARIANT_TARGETS = libgomnivoicecpp-fallback.dylib
endif
omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
@@ -77,7 +78,7 @@ package: omnivoice-cpp
build: package
clean: purge
- rm -rf libgomnivoicecpp*.so package sources/omnivoice.cpp omnivoice-cpp
+ rm -rf libgomnivoicecpp*.so libgomnivoicecpp*.dylib package sources/omnivoice.cpp omnivoice-cpp
purge:
rm -rf build*
@@ -106,13 +107,20 @@ libgomnivoicecpp-fallback.so: sources/omnivoice.cpp
SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
rm -rf build-libgomnivoicecpp-fallback.so
+# Build fallback variant as a dylib (Darwin)
+libgomnivoicecpp-fallback.dylib: sources/omnivoice.cpp
+ $(info ${GREEN}I omnivoice-cpp build info:fallback (dylib)${RESET})
+ SO_TARGET=libgomnivoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
+ rm -rf build-libgomnivoicecpp-fallback.dylib
+
libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
cd .. && \
- mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgomnivoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: omnivoice-cpp
@echo "Running omnivoice-cpp tests..."
diff --git a/backend/go/omnivoice-cpp/main.go b/backend/go/omnivoice-cpp/main.go
index 891201f49..f44eb31a7 100644
--- a/backend/go/omnivoice-cpp/main.go
+++ b/backend/go/omnivoice-cpp/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() {
libName := os.Getenv("OMNIVOICE_LIBRARY")
if libName == "" {
- libName = "./libgomnivoicecpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgomnivoicecpp-fallback.dylib"
+ } else {
+ libName = "./libgomnivoicecpp-fallback.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/omnivoice-cpp/package.sh b/backend/go/omnivoice-cpp/package.sh
index b8313d9d7..97a8d7809 100755
--- a/backend/go/omnivoice-cpp/package.sh
+++ b/backend/go/omnivoice-cpp/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
-cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgomnivoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/omnivoice-cpp/run.sh b/backend/go/omnivoice-cpp/run.sh
index f677ca21c..81ea2b719 100755
--- a/backend/go/omnivoice-cpp/run.sh
+++ b/backend/go/omnivoice-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export OMNIVOICE_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/parakeet-cpp/Makefile b/backend/go/parakeet-cpp/Makefile
index f9848dc34..7fc46f8e2 100644
--- a/backend/go/parakeet-cpp/Makefile
+++ b/backend/go/parakeet-cpp/Makefile
@@ -74,6 +74,7 @@ libparakeet.so: sources/parakeet.cpp
cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
+ cp -fv sources/parakeet.cpp/build-shared/libparakeet.dylib ./ 2>/dev/null || true
cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go
diff --git a/backend/go/parakeet-cpp/main.go b/backend/go/parakeet-cpp/main.go
index 963056e23..9c6466b13 100644
--- a/backend/go/parakeet-cpp/main.go
+++ b/backend/go/parakeet-cpp/main.go
@@ -2,15 +2,17 @@ package main
// Started internally by LocalAI - one gRPC server per loaded model.
//
-// Loads libparakeet.so via purego and registers the flat C-API entry
-// points declared in parakeet_capi.h. The library name can be overridden
-// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
-// convention in the sibling backends); the default looks for the .so next
-// to this binary.
+// Loads the parakeet shared library via purego and registers the flat
+// C-API entry points declared in parakeet_capi.h. The library name can be
+// overridden with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY /
+// VIBEVOICECPP_LIBRARY convention in the sibling backends); the default
+// looks next to this binary for libparakeet.so on Linux and
+// libparakeet.dylib on macOS.
import (
"flag"
"fmt"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -28,7 +30,11 @@ type LibFuncs struct {
func main() {
libName := os.Getenv("PARAKEET_LIBRARY")
if libName == "" {
- libName = "libparakeet.so"
+ if runtime.GOOS == "darwin" {
+ libName = "libparakeet.dylib"
+ } else {
+ libName = "libparakeet.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/parakeet-cpp/package.sh b/backend/go/parakeet-cpp/package.sh
index 0b580324c..af8e6b9e1 100755
--- a/backend/go/parakeet-cpp/package.sh
+++ b/backend/go/parakeet-cpp/package.sh
@@ -16,12 +16,15 @@ mkdir -p "$CURDIR/package/lib"
cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
-# libparakeet.so + any soname symlinks (libparakeet.so.X[.Y]). purego.Dlopen
-# resolves it via LD_LIBRARY_PATH, which run.sh points at lib/.
-cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || {
- echo "ERROR: libparakeet.so not found in $CURDIR, run 'make' first" >&2
+# libparakeet shared lib + any soname symlinks. On Linux this is
+# libparakeet.so[.X.Y]; on macOS it is libparakeet.dylib. purego.Dlopen
+# resolves it via the *_LIBRARY_PATH that run.sh points at lib/.
+cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || true
+cp -avf "$CURDIR"/libparakeet.dylib "$CURDIR/package/lib/" 2>/dev/null || true
+if ! ls "$CURDIR"/package/lib/libparakeet.* >/dev/null 2>&1; then
+ echo "ERROR: libparakeet shared library not found in $CURDIR, run 'make' first" >&2
exit 1
-}
+fi
# Detect architecture and copy the core runtime libs libparakeet.so links
# against, plus the matching dynamic loader as lib/ld.so.
@@ -48,7 +51,7 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
elif [ "$(uname -s)" = "Darwin" ]; then
- echo "Detected Darwin"
+ echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
else
echo "Error: Could not detect architecture"
exit 1
diff --git a/backend/go/parakeet-cpp/run.sh b/backend/go/parakeet-cpp/run.sh
index 6f371d4f0..be859f381 100755
--- a/backend/go/parakeet-cpp/run.sh
+++ b/backend/go/parakeet-cpp/run.sh
@@ -3,11 +3,17 @@ set -e
CURDIR=$(dirname "$(realpath "$0")")
-export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+if [ "$(uname)" = "Darwin" ]; then
+ export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
+ export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
+else
+ export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
+ export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
+fi
# If a self-contained ld.so was packaged, route through it so the
# packaged libc / libstdc++ are used instead of the host's (matches the
-# whisper backend's runtime layout).
+# whisper backend's runtime layout). Linux only.
if [ -f "$CURDIR/lib/ld.so" ]; then
echo "Using lib/ld.so"
exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"
diff --git a/backend/go/qwen3-tts-cpp/Makefile b/backend/go/qwen3-tts-cpp/Makefile
index 4015f790e..c2bc6de34 100644
--- a/backend/go/qwen3-tts-cpp/Makefile
+++ b/backend/go/qwen3-tts-cpp/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# qwentts.cpp version
QWEN3TTS_REPO?=https://github.com/ServeurpersoCom/qwentts.cpp
-QWEN3TTS_CPP_VERSION?=4536dcdce27c3764a93a06d6bf64026b124962f5
+QWEN3TTS_CPP_VERSION?=9dbe7ea26a01b30fccb117ae5e86807c1dc23d42
SO_TARGET?=libgoqwen3ttscpp.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
@@ -65,8 +65,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
else
- # On non-Linux (e.g., Darwin), build only fallback variant
- VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so
+ # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+ VARIANT_TARGETS = libgoqwen3ttscpp-fallback.dylib
endif
qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
@@ -78,7 +78,7 @@ package: qwen3-tts-cpp
build: package
clean: purge
- rm -rf libgoqwen3ttscpp*.so package sources/qwentts.cpp qwen3-tts-cpp
+ rm -rf libgoqwen3ttscpp*.so libgoqwen3ttscpp*.dylib package sources/qwentts.cpp qwen3-tts-cpp
purge:
rm -rf build*
@@ -110,13 +110,20 @@ libgoqwen3ttscpp-fallback.so: sources/qwentts.cpp
SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
rm -rf build-libgoqwen3ttscpp-fallback.so
+# Build fallback variant as a dylib (Darwin)
+libgoqwen3ttscpp-fallback.dylib: sources/qwentts.cpp
+ $(info ${GREEN}I qwen3-tts-cpp build info:fallback (dylib)${RESET})
+ SO_TARGET=libgoqwen3ttscpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
+ rm -rf build-libgoqwen3ttscpp-fallback.dylib
+
libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
cd .. && \
- mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgoqwen3ttscpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: qwen3-tts-cpp
@echo "Running qwen3-tts-cpp tests..."
diff --git a/backend/go/qwen3-tts-cpp/main.go b/backend/go/qwen3-tts-cpp/main.go
index b788229cd..041a23ad0 100644
--- a/backend/go/qwen3-tts-cpp/main.go
+++ b/backend/go/qwen3-tts-cpp/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() {
libName := os.Getenv("QWEN3TTS_LIBRARY")
if libName == "" {
- libName = "./libgoqwen3ttscpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgoqwen3ttscpp-fallback.dylib"
+ } else {
+ libName = "./libgoqwen3ttscpp-fallback.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/qwen3-tts-cpp/package.sh b/backend/go/qwen3-tts-cpp/package.sh
index bb73df968..11d4c57c3 100755
--- a/backend/go/qwen3-tts-cpp/package.sh
+++ b/backend/go/qwen3-tts-cpp/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
-cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgoqwen3ttscpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/qwen3-tts-cpp/run.sh b/backend/go/qwen3-tts-cpp/run.sh
index 6416779fa..638cf9661 100755
--- a/backend/go/qwen3-tts-cpp/run.sh
+++ b/backend/go/qwen3-tts-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export QWEN3TTS_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/rfdetr-cpp/Makefile b/backend/go/rfdetr-cpp/Makefile
index 7c598f732..3282720ff 100644
--- a/backend/go/rfdetr-cpp/Makefile
+++ b/backend/go/rfdetr-cpp/Makefile
@@ -71,7 +71,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so
else
# On non-Linux (e.g., Darwin), build only fallback variant
- VARIANT_TARGETS = librfdetrcpp-fallback.so
+ VARIANT_TARGETS = librfdetrcpp-fallback.dylib
endif
rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS)
@@ -83,7 +83,7 @@ package: rfdetr-cpp
build: package
clean: purge
- rm -rf librfdetrcpp*.so rfdetr-cpp package sources
+ rm -rf librfdetrcpp*.so librfdetrcpp*.dylib rfdetr-cpp package sources
purge:
rm -rf build*
@@ -110,11 +110,19 @@ librfdetrcpp-avx512.so: sources/rt-detr.cpp
endif
# Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+librfdetrcpp-fallback.dylib: sources/rt-detr.cpp
+ rm -rfv build-$@
+ $(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
+ SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
+ rm -rfv build-$@
+else
librfdetrcpp-fallback.so: sources/rt-detr.cpp
rm -rfv build-$@
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
rm -rfv build-$@
+endif
librfdetrcpp-custom: CMakeLists.txt
mkdir -p build-$(SO_TARGET) && \
@@ -122,7 +130,8 @@ librfdetrcpp-custom: CMakeLists.txt
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/librfdetrcpp.dylib ./$(SO_TARGET) 2>/dev/null)
all: rfdetr-cpp package
diff --git a/backend/go/rfdetr-cpp/main.go b/backend/go/rfdetr-cpp/main.go
index 3c95df1c2..58637122a 100644
--- a/backend/go/rfdetr-cpp/main.go
+++ b/backend/go/rfdetr-cpp/main.go
@@ -9,6 +9,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -27,7 +28,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("RFDETR_LIBRARY")
if libName == "" {
- libName = "./librfdetrcpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./librfdetrcpp-fallback.dylib"
+ } else {
+ libName = "./librfdetrcpp-fallback.so"
+ }
}
rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/rfdetr-cpp/package.sh b/backend/go/rfdetr-cpp/package.sh
index 9591b79dc..17319bf27 100755
--- a/backend/go/rfdetr-cpp/package.sh
+++ b/backend/go/rfdetr-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory
mkdir -p $CURDIR/package/lib
-cp -avf $CURDIR/librfdetrcpp-*.so $CURDIR/package/
+cp -fv $CURDIR/librfdetrcpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/librfdetrcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
diff --git a/backend/go/rfdetr-cpp/run.sh b/backend/go/rfdetr-cpp/run.sh
index 042904e45..ffbd604dd 100755
--- a/backend/go/rfdetr-cpp/run.sh
+++ b/backend/go/rfdetr-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/librfdetrcpp-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export RFDETR_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/sam3-cpp/Makefile b/backend/go/sam3-cpp/Makefile
index 53b0dfb5e..27b6cedf7 100644
--- a/backend/go/sam3-cpp/Makefile
+++ b/backend/go/sam3-cpp/Makefile
@@ -66,7 +66,7 @@ ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
else
# On non-Linux (e.g., Darwin), build only fallback variant
- VARIANT_TARGETS = libgosam3-fallback.so
+ VARIANT_TARGETS = libgosam3-fallback.dylib
endif
sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
@@ -78,7 +78,7 @@ package: sam3-cpp
build: package
clean: purge
- rm -rf libgosam3*.so sam3-cpp package sources
+ rm -rf libgosam3*.so libgosam3*.dylib sam3-cpp package sources
purge:
rm -rf build*
@@ -105,11 +105,19 @@ libgosam3-avx512.so: sources/sam3.cpp
endif
# Build fallback variant (all platforms)
+ifeq ($(UNAME_S),Darwin)
+libgosam3-fallback.dylib: sources/sam3.cpp
+ $(MAKE) purge
+ $(info ${GREEN}I sam3-cpp build info:fallback${RESET})
+ SO_TARGET=libgosam3-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
+ rm -rfv build*
+else
libgosam3-fallback.so: sources/sam3.cpp
$(MAKE) purge
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
rm -rfv build*
+endif
libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
mkdir -p build-$(SO_TARGET) && \
@@ -117,6 +125,7 @@ libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgosam3.dylib ./$(SO_TARGET) 2>/dev/null)
all: sam3-cpp package
diff --git a/backend/go/sam3-cpp/main.go b/backend/go/sam3-cpp/main.go
index c83a59285..e36849f69 100644
--- a/backend/go/sam3-cpp/main.go
+++ b/backend/go/sam3-cpp/main.go
@@ -3,6 +3,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("SAM3_LIBRARY")
if libName == "" {
- libName = "./libgosam3-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgosam3-fallback.dylib"
+ } else {
+ libName = "./libgosam3-fallback.so"
+ }
}
gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/sam3-cpp/package.sh b/backend/go/sam3-cpp/package.sh
index 254aef286..a648ee93c 100755
--- a/backend/go/sam3-cpp/package.sh
+++ b/backend/go/sam3-cpp/package.sh
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
# Create lib directory
mkdir -p $CURDIR/package/lib
-cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/
+cp -fv $CURDIR/libgosam3-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgosam3-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/sam3-cpp $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
diff --git a/backend/go/sam3-cpp/run.sh b/backend/go/sam3-cpp/run.sh
index 423ed9199..7bff52df6 100755
--- a/backend/go/sam3-cpp/run.sh
+++ b/backend/go/sam3-cpp/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgosam3-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libgosam3-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgosam3-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgosam3-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgosam3-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export SAM3_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/sherpa-onnx/backend.go b/backend/go/sherpa-onnx/backend.go
index 0a092acf7..8bfe5e75c 100644
--- a/backend/go/sherpa-onnx/backend.go
+++ b/backend/go/sherpa-onnx/backend.go
@@ -7,6 +7,7 @@ import (
"fmt"
"os"
"path/filepath"
+ "runtime"
"strconv"
"strings"
"sync"
@@ -238,11 +239,19 @@ func loadSherpaLibs() error {
func loadSherpaLibsOnce() error {
shimLib := os.Getenv("SHERPA_SHIM_LIBRARY")
if shimLib == "" {
- shimLib = "libsherpa-shim.so"
+ if runtime.GOOS == "darwin" {
+ shimLib = "libsherpa-shim.dylib"
+ } else {
+ shimLib = "libsherpa-shim.so"
+ }
}
capiLib := os.Getenv("SHERPA_ONNX_LIBRARY")
if capiLib == "" {
- capiLib = "libsherpa-onnx-c-api.so"
+ if runtime.GOOS == "darwin" {
+ capiLib = "libsherpa-onnx-c-api.dylib"
+ } else {
+ capiLib = "libsherpa-onnx-c-api.so"
+ }
}
shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/sherpa-onnx/run.sh b/backend/go/sherpa-onnx/run.sh
index b703e5155..771324326 100755
--- a/backend/go/sherpa-onnx/run.sh
+++ b/backend/go/sherpa-onnx/run.sh
@@ -3,7 +3,13 @@ set -ex
CURDIR=$(dirname "$(realpath $0)")
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+if [ "$(uname)" = "Darwin" ]; then
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+ export SHERPA_SHIM_LIBRARY=$CURDIR/lib/libsherpa-shim.dylib
+ export SHERPA_ONNX_LIBRARY=$CURDIR/lib/libsherpa-onnx-c-api.dylib
+else
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+fi
if [ -f $CURDIR/lib/ld.so ]; then
echo "Using lib/ld.so"
diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index 05b57b254..7a9917ea8 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
+STABLEDIFFUSION_GGML_VERSION?=8caa3f908ae6d4a4bef531e73b9a969f266a3d1f
CMAKE_ARGS+=-DGGML_MAX_NAME=128
@@ -131,6 +131,7 @@ libgosd-custom: CMakeLists.txt cpp/gosd.cpp cpp/gosd.h
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgosd.dylib ./$(SO_TARGET) 2>/dev/null)
all: stablediffusion-ggml package
\ No newline at end of file
diff --git a/backend/go/stablediffusion-ggml/main.go b/backend/go/stablediffusion-ggml/main.go
index 998f2a5ab..b509c6a2b 100644
--- a/backend/go/stablediffusion-ggml/main.go
+++ b/backend/go/stablediffusion-ggml/main.go
@@ -3,6 +3,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("SD_LIBRARY")
if libName == "" {
- libName = "./libgosd-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgosd-fallback.dylib"
+ } else {
+ libName = "./libgosd-fallback.so"
+ }
}
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/stablediffusion-ggml/package.sh b/backend/go/stablediffusion-ggml/package.sh
index 8006e081f..922fb71ea 100755
--- a/backend/go/stablediffusion-ggml/package.sh
+++ b/backend/go/stablediffusion-ggml/package.sh
@@ -12,6 +12,7 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/libgosd-*.so $CURDIR/package/
+cp -fv $CURDIR/libgosd-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
cp -fv $CURDIR/run.sh $CURDIR/package/
diff --git a/backend/go/stablediffusion-ggml/run.sh b/backend/go/stablediffusion-ggml/run.sh
index 71342e43b..e026b4b28 100755
--- a/backend/go/stablediffusion-ggml/run.sh
+++ b/backend/go/stablediffusion-ggml/run.sh
@@ -12,9 +12,18 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgosd-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single library variant (Metal or Accelerate). The gosd target is
+ # built as a CMake MODULE, which emits a .dylib for a SHARED build but a
+ # .so for a MODULE build on Apple, so prefer .dylib and fall back to .so.
+ LIBRARY="$CURDIR/libgosd-fallback.dylib"
+ if [ ! -e "$LIBRARY" ]; then
+ LIBRARY="$CURDIR/libgosd-fallback.so"
+ fi
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgosd-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgosd-avx.so ]; then
@@ -36,9 +45,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgosd-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export SD_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/go/vibevoice-cpp/Makefile b/backend/go/vibevoice-cpp/Makefile
index 199df9cc4..dc71eaa5d 100644
--- a/backend/go/vibevoice-cpp/Makefile
+++ b/backend/go/vibevoice-cpp/Makefile
@@ -70,8 +70,8 @@ UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so
else
- # On non-Linux (e.g., Darwin), build only fallback variant
- VARIANT_TARGETS = libgovibevoicecpp-fallback.so
+ # On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
+ VARIANT_TARGETS = libgovibevoicecpp-fallback.dylib
endif
vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS)
@@ -83,7 +83,7 @@ package: vibevoice-cpp
build: package
clean: purge
- rm -rf libgovibevoicecpp*.so package sources/vibevoice.cpp vibevoice-cpp
+ rm -rf libgovibevoicecpp*.so libgovibevoicecpp*.dylib package sources/vibevoice.cpp vibevoice-cpp
purge:
rm -rf build*
@@ -119,13 +119,21 @@ libgovibevoicecpp-fallback.so: sources/vibevoice.cpp
SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
rm -rfv build*
+# Build fallback variant as a dylib (Darwin)
+libgovibevoicecpp-fallback.dylib: sources/vibevoice.cpp
+ $(MAKE) purge
+ $(info ${GREEN}I vibevoice-cpp build info:fallback (dylib)${RESET})
+ SO_TARGET=libgovibevoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
+ rm -rfv build*
+
libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h
mkdir -p build-$(SO_TARGET) && \
cd build-$(SO_TARGET) && \
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \
cd .. && \
- mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET)
+ (mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgovibevoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
test: vibevoice-cpp
@echo "Running vibevoice-cpp tests..."
diff --git a/backend/go/vibevoice-cpp/main.go b/backend/go/vibevoice-cpp/main.go
index dd1f1ba43..b9a696d82 100644
--- a/backend/go/vibevoice-cpp/main.go
+++ b/backend/go/vibevoice-cpp/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -21,7 +22,11 @@ type LibFuncs struct {
func main() {
libName := os.Getenv("VIBEVOICECPP_LIBRARY")
if libName == "" {
- libName = "./libgovibevoicecpp-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgovibevoicecpp-fallback.dylib"
+ } else {
+ libName = "./libgovibevoicecpp-fallback.so"
+ }
}
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/vibevoice-cpp/package.sh b/backend/go/vibevoice-cpp/package.sh
index 88010846f..62860b8d6 100755
--- a/backend/go/vibevoice-cpp/package.sh
+++ b/backend/go/vibevoice-cpp/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/
-cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/
+cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgovibevoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/vibevoice-cpp/run.sh b/backend/go/vibevoice-cpp/run.sh
index 93e92d5b8..ec5a39c14 100755
--- a/backend/go/vibevoice-cpp/run.sh
+++ b/backend/go/vibevoice-cpp/run.sh
@@ -11,9 +11,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libgovibevoicecpp-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
@@ -34,9 +38,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export VIBEVOICECPP_LIBRARY=$LIBRARY
if [ -f $CURDIR/lib/ld.so ]; then
diff --git a/backend/go/whisper/Makefile b/backend/go/whisper/Makefile
index e8ad8545f..6dd13dd2c 100644
--- a/backend/go/whisper/Makefile
+++ b/backend/go/whisper/Makefile
@@ -117,6 +117,7 @@ libgowhisper-custom: CMakeLists.txt cpp/gowhisper.cpp cpp/gowhisper.h
cmake .. $(CMAKE_ARGS) && \
cmake --build . --config Release -j$(JOBS) && \
cd .. && \
- mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET)
+ mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET) 2>/dev/null || \
+ mv build-$(SO_TARGET)/libgowhisper.dylib ./$(SO_TARGET:.so=.dylib)
all: whisper package
diff --git a/backend/go/whisper/main.go b/backend/go/whisper/main.go
index e48b24519..ab102f4c4 100644
--- a/backend/go/whisper/main.go
+++ b/backend/go/whisper/main.go
@@ -4,6 +4,7 @@ package main
import (
"flag"
"os"
+ "runtime"
"github.com/ebitengine/purego"
grpc "github.com/mudler/LocalAI/pkg/grpc"
@@ -22,7 +23,11 @@ func main() {
// Get library name from environment variable, default to fallback
libName := os.Getenv("WHISPER_LIBRARY")
if libName == "" {
- libName = "./libgowhisper-fallback.so"
+ if runtime.GOOS == "darwin" {
+ libName = "./libgowhisper-fallback.dylib"
+ } else {
+ libName = "./libgowhisper-fallback.so"
+ }
}
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
diff --git a/backend/go/whisper/package.sh b/backend/go/whisper/package.sh
index dfecdf5c6..efeaa7009 100755
--- a/backend/go/whisper/package.sh
+++ b/backend/go/whisper/package.sh
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
mkdir -p $CURDIR/package/lib
cp -avf $CURDIR/whisper $CURDIR/package/
-cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/
+cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/ 2>/dev/null || true
+cp -fv $CURDIR/libgowhisper-*.dylib $CURDIR/package/ 2>/dev/null || true
cp -fv $CURDIR/run.sh $CURDIR/package/
# Detect architecture and copy appropriate libraries
diff --git a/backend/go/whisper/run.sh b/backend/go/whisper/run.sh
index 1af2c0535..0e2bd7eb0 100755
--- a/backend/go/whisper/run.sh
+++ b/backend/go/whisper/run.sh
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
grep -e "flags" /proc/cpuinfo | head -1
fi
-LIBRARY="$CURDIR/libgowhisper-fallback.so"
+if [ "$(uname)" = "Darwin" ]; then
+ # macOS: single dylib variant (Metal or Accelerate)
+ LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
+ export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
+else
+ LIBRARY="$CURDIR/libgowhisper-fallback.so"
-if [ "$(uname)" != "Darwin" ]; then
if grep -q -e "\savx\s" /proc/cpuinfo ; then
echo "CPU: AVX found OK"
if [ -e $CURDIR/libgowhisper-avx.so ]; then
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
LIBRARY="$CURDIR/libgowhisper-avx512.so"
fi
fi
+
+ export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
fi
-export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
export WHISPER_LIBRARY=$LIBRARY
# If there is a lib/ld.so, use it
diff --git a/backend/index.yaml b/backend/index.yaml
index 729de2abc..4a7a07d82 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -5307,6 +5307,7 @@
nvidia: "cuda12-trl"
nvidia-cuda-12: "cuda12-trl"
nvidia-cuda-13: "cuda13-trl"
+ metal: "metal-trl"
## TRL backend images
- !!merge <<: *trl
name: "cpu-trl"
@@ -5338,6 +5339,16 @@
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
mirrors:
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
+- !!merge <<: *trl
+ name: "metal-trl"
+ uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
+ mirrors:
+ - localai/localai-backends:latest-metal-darwin-arm64-trl
+- !!merge <<: *trl
+ name: "metal-trl-development"
+ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
+ mirrors:
+ - localai/localai-backends:master-metal-darwin-arm64-trl
## llama.cpp quantization backend
- &llama-cpp-quantization
name: "llama-cpp-quantization"
diff --git a/backend/python/trl/install.sh b/backend/python/trl/install.sh
index 6963e60ed..ce0552f87 100644
--- a/backend/python/trl/install.sh
+++ b/backend/python/trl/install.sh
@@ -8,7 +8,13 @@ else
source $backend_dir/../common/libbackend.sh
fi
-EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
+# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
+# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
+# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
+if [ "x${USE_PIP:-}" != "xtrue" ]; then
+ EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
+fi
installRequirements
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
diff --git a/backend/python/trl/requirements-mps.txt b/backend/python/trl/requirements-mps.txt
new file mode 100644
index 000000000..fbdfb6536
--- /dev/null
+++ b/backend/python/trl/requirements-mps.txt
@@ -0,0 +1,12 @@
+torch==2.10.0
+trl
+peft
+datasets>=3.0.0
+transformers>=4.56.2
+accelerate>=1.4.0
+huggingface-hub>=1.3.0
+sentencepiece
+# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
+# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
+# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
+# on macOS arm64.
diff --git a/core/cli/run.go b/core/cli/run.go
index abb0cdbf1..fd7ba8cd9 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -140,7 +140,7 @@ type RunCMD struct {
OIDCIssuer string `env:"LOCALAI_OIDC_ISSUER" help:"OIDC issuer URL for auto-discovery" group:"auth"`
OIDCClientID string `env:"LOCALAI_OIDC_CLIENT_ID" help:"OIDC Client ID (auto-enables auth)" group:"auth"`
OIDCClientSecret string `env:"LOCALAI_OIDC_CLIENT_SECRET" help:"OIDC Client Secret" group:"auth"`
- AuthBaseURL string `env:"LOCALAI_BASE_URL" help:"Base URL for OAuth callbacks (e.g. http://localhost:8080)" group:"auth"`
+ ExternalBaseURL string `env:"LOCALAI_BASE_URL" help:"External base URL of this instance (e.g. https://localhost:8080). Used for OAuth callbacks and self-referential links (generated images/videos, job status). When unset, derived from X-Forwarded-Proto/Host or Forwarded headers." group:"api"`
AuthAdminEmail string `env:"LOCALAI_ADMIN_EMAIL" help:"Email address to auto-promote to admin role" group:"auth"`
AuthRegistrationMode string `env:"LOCALAI_REGISTRATION_MODE" default:"open" help:"Registration mode: 'open' (default), 'approval', or 'invite' (invite code required)" group:"auth"`
DisableLocalAuth bool `env:"LOCALAI_DISABLE_LOCAL_AUTH" default:"false" help:"Disable local email/password registration and login (use with OAuth/OIDC-only setups)" group:"auth"`
@@ -503,9 +503,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
opts = append(opts, config.WithAuthOIDCClientID(r.OIDCClientID))
opts = append(opts, config.WithAuthOIDCClientSecret(r.OIDCClientSecret))
}
- if r.AuthBaseURL != "" {
- opts = append(opts, config.WithAuthBaseURL(r.AuthBaseURL))
- }
if r.AuthAdminEmail != "" {
opts = append(opts, config.WithAuthAdminEmail(r.AuthAdminEmail))
}
@@ -523,6 +520,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
}
}
+ // Applied unconditionally: the external base URL governs all self-referential
+ // links (not just OAuth callbacks), so it must take effect even when auth is off.
+ if r.ExternalBaseURL != "" {
+ opts = append(opts, config.WithExternalBaseURL(r.ExternalBaseURL))
+ }
+
if idleWatchDog || busyWatchDog {
opts = append(opts, config.EnableWatchDog)
if idleWatchDog {
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 87acd6bd5..1821a8441 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -49,6 +49,13 @@ type ApplicationConfig struct {
P2PNetworkID string
Federated bool
+ // ExternalBaseURL is the externally visible base URL of this instance
+ // (scheme+host[:port]), set via LOCALAI_BASE_URL. When non-empty it is
+ // authoritative for every self-referential URL LocalAI emits (OAuth
+ // callbacks, generated image/video links, async job StatusURLs),
+ // overriding proxy-header detection. Empty = derive from request headers.
+ ExternalBaseURL string
+
// DisableStats turns off per-request token tracking. By default the
// routing module's billing recorder runs in every mode (including
// no-auth single-user) so dashboards and `/api/usage` are immediately
@@ -196,7 +203,6 @@ type AuthConfig struct {
OIDCIssuer string // OIDC issuer URL for auto-discovery (e.g. https://accounts.google.com)
OIDCClientID string
OIDCClientSecret string
- BaseURL string // for OAuth callback URLs (e.g. "http://localhost:8080")
AdminEmail string // auto-promote to admin on login
RegistrationMode string // "open", "approval" (default when empty), "invite"
DisableLocalAuth bool // disable local email/password registration and login
@@ -950,9 +956,9 @@ func WithAuthGitHubClientSecret(clientSecret string) AppOption {
}
}
-func WithAuthBaseURL(baseURL string) AppOption {
+func WithExternalBaseURL(url string) AppOption {
return func(o *ApplicationConfig) {
- o.Auth.BaseURL = baseURL
+ o.ExternalBaseURL = url
}
}
diff --git a/core/http/app.go b/core/http/app.go
index 9ec0711fb..ee5cd99eb 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -149,6 +149,18 @@ func API(application *application.Application) (*echo.Echo, error) {
// Middleware - StripPathPrefix must be registered early as it uses Rewrite which runs before routing
e.Pre(httpMiddleware.StripPathPrefix())
+ // Stamp the configured external base URL into each request context so
+ // middleware.BaseURL can treat it as authoritative for self-referential
+ // links. Registered as Pre so it runs before routing and handlers.
+ if extBaseURL := application.ApplicationConfig().ExternalBaseURL; extBaseURL != "" {
+ e.Pre(func(next echo.HandlerFunc) echo.HandlerFunc {
+ return func(c echo.Context) error {
+ c.Set("_external_base_url", extBaseURL)
+ return next(c)
+ }
+ })
+ }
+
e.Pre(middleware.RemoveTrailingSlash())
if application.ApplicationConfig().MachineTag != "" {
diff --git a/core/http/middleware/baseurl.go b/core/http/middleware/baseurl.go
index a1e1844ae..84f72cf69 100644
--- a/core/http/middleware/baseurl.go
+++ b/core/http/middleware/baseurl.go
@@ -55,17 +55,70 @@ func BasePathPrefix(c echo.Context) string {
// The returned URL is guaranteed to end with `/`.
// The method should be used in conjunction with the StripPathPrefix middleware.
func BaseURL(c echo.Context) string {
+ // An explicit external base URL (LOCALAI_BASE_URL) is authoritative for
+ // the origin. The proxy-derived path prefix is still appended so a
+ // reverse-proxy mount point keeps working. Trailing slashes are
+ // normalized via BasePathPrefix, which always starts and ends with "/".
+ if ext, ok := c.Get("_external_base_url").(string); ok && ext != "" {
+ return strings.TrimRight(ext, "/") + BasePathPrefix(c)
+ }
+
+ fwdProto, fwdHost := parseForwarded(c.Request().Header.Get("Forwarded"))
+
scheme := "http"
- if c.Request().Header.Get("X-Forwarded-Proto") == "https" {
+ switch {
+ case c.Request().TLS != nil:
scheme = "https"
- } else if c.Request().TLS != nil {
+ case strings.EqualFold(firstToken(c.Request().Header.Get("X-Forwarded-Proto")), "https"):
+ scheme = "https"
+ case strings.EqualFold(fwdProto, "https"):
scheme = "https"
}
host := c.Request().Host
if forwardedHost := c.Request().Header.Get("X-Forwarded-Host"); forwardedHost != "" {
host = forwardedHost
+ } else if fwdHost != "" {
+ host = fwdHost
}
return scheme + "://" + host + BasePathPrefix(c)
}
+
+// firstToken returns the first comma-separated token of v, trimmed of spaces.
+// Reverse-proxy chains can emit X-Forwarded-Proto as "https,http"; only the
+// first hop (closest to the client) is meaningful for scheme detection.
+func firstToken(v string) string {
+ if i := strings.IndexByte(v, ','); i >= 0 {
+ v = v[:i]
+ }
+ return strings.TrimSpace(v)
+}
+
+// parseForwarded extracts the proto and host directives from the first element
+// of an RFC 7239 Forwarded header (e.g. `for=x;proto=https;host=h, for=y`).
+// Values may be quoted. Returns empty strings when absent or malformed so the
+// caller can fall through to other signals.
+func parseForwarded(header string) (proto, host string) {
+ if header == "" {
+ return "", ""
+ }
+ // Only the first element (closest proxy to the client) matters here.
+ if i := strings.IndexByte(header, ','); i >= 0 {
+ header = header[:i]
+ }
+ for _, directive := range strings.Split(header, ";") {
+ key, value, ok := strings.Cut(strings.TrimSpace(directive), "=")
+ if !ok {
+ continue
+ }
+ value = strings.Trim(strings.TrimSpace(value), `"`)
+ switch strings.ToLower(strings.TrimSpace(key)) {
+ case "proto":
+ proto = value
+ case "host":
+ host = value
+ }
+ }
+ return proto, host
+}
diff --git a/core/http/middleware/baseurl_test.go b/core/http/middleware/baseurl_test.go
index 4f6dbb1d1..6a132514b 100644
--- a/core/http/middleware/baseurl_test.go
+++ b/core/http/middleware/baseurl_test.go
@@ -135,4 +135,138 @@ var _ = Describe("BaseURL", func() {
Entry("missing leading slash", "evil"),
)
})
+
+ Context("scheme detection hardening", func() {
+ It("treats comma-separated X-Forwarded-Proto as https when first token is https", func() {
+ app := echo.New()
+ actualURL := ""
+ app.GET("/x", func(c echo.Context) error {
+ actualURL = BaseURL(c)
+ return nil
+ })
+ req := httptest.NewRequest("GET", "/x", nil)
+ req.Header.Set("X-Forwarded-Proto", "https,http")
+ rec := httptest.NewRecorder()
+ app.ServeHTTP(rec, req)
+ Expect(actualURL).To(Equal("https://example.com/"))
+ })
+
+ It("derives https from the RFC 7239 Forwarded proto directive", func() {
+ app := echo.New()
+ actualURL := ""
+ app.GET("/x", func(c echo.Context) error {
+ actualURL = BaseURL(c)
+ return nil
+ })
+ req := httptest.NewRequest("GET", "/x", nil)
+ req.Header.Set("Forwarded", "for=192.0.2.1;proto=https;host=proxy.example")
+ rec := httptest.NewRecorder()
+ app.ServeHTTP(rec, req)
+ Expect(actualURL).To(Equal("https://proxy.example/"))
+ })
+
+ It("prefers X-Forwarded-Host over the Forwarded host directive", func() {
+ app := echo.New()
+ actualURL := ""
+ app.GET("/x", func(c echo.Context) error {
+ actualURL = BaseURL(c)
+ return nil
+ })
+ req := httptest.NewRequest("GET", "/x", nil)
+ req.Header.Set("X-Forwarded-Host", "xfh.example")
+ req.Header.Set("Forwarded", "host=fwd.example;proto=https")
+ rec := httptest.NewRecorder()
+ app.ServeHTTP(rec, req)
+ Expect(actualURL).To(Equal("https://xfh.example/"))
+ })
+ })
+
+ Context("explicit external base URL override", func() {
+ It("uses the configured origin over conflicting forwarded headers", func() {
+ app := echo.New()
+ actualURL := ""
+ app.GET("/x", func(c echo.Context) error {
+ c.Set("_external_base_url", "https://192.168.0.13:34567")
+ actualURL = BaseURL(c)
+ return nil
+ })
+ req := httptest.NewRequest("GET", "/x", nil)
+ req.Header.Set("X-Forwarded-Proto", "http")
+ req.Header.Set("X-Forwarded-Host", "internal:8080")
+ rec := httptest.NewRecorder()
+ app.ServeHTTP(rec, req)
+ Expect(actualURL).To(Equal("https://192.168.0.13:34567/"))
+ })
+
+ It("combines the configured origin with a detected path prefix", func() {
+ app := echo.New()
+ actualURL := ""
+ app.GET("/hello", func(c echo.Context) error {
+ c.Set("_original_path", "/localai/hello")
+ c.Set("_external_base_url", "https://ext.example")
+ actualURL = BaseURL(c)
+ return nil
+ })
+ req := httptest.NewRequest("GET", "/hello", nil)
+ rec := httptest.NewRecorder()
+ app.ServeHTTP(rec, req)
+ Expect(actualURL).To(Equal("https://ext.example/localai/"))
+ })
+
+ It("ignores an empty override", func() {
+ app := echo.New()
+ actualURL := ""
+ app.GET("/x", func(c echo.Context) error {
+ c.Set("_external_base_url", "")
+ actualURL = BaseURL(c)
+ return nil
+ })
+ req := httptest.NewRequest("GET", "/x", nil)
+ rec := httptest.NewRecorder()
+ app.ServeHTTP(rec, req)
+ Expect(actualURL).To(Equal("http://example.com/"))
+ })
+ })
+
+ Context("parseForwarded helper", func() {
+ It("parses unquoted proto and host", func() {
+ proto, host := parseForwarded("for=192.0.2.1;proto=https;host=h.example")
+ Expect(proto).To(Equal("https"))
+ Expect(host).To(Equal("h.example"))
+ })
+
+ It("strips quotes around values", func() {
+ proto, host := parseForwarded(`proto="https";host="h.example"`)
+ Expect(proto).To(Equal("https"))
+ Expect(host).To(Equal("h.example"))
+ })
+
+ It("uses only the first element of a multi-element header", func() {
+ proto, host := parseForwarded("proto=https;host=first.example, proto=http;host=second.example")
+ Expect(proto).To(Equal("https"))
+ Expect(host).To(Equal("first.example"))
+ })
+
+ It("returns empty strings for an empty header", func() {
+ proto, host := parseForwarded("")
+ Expect(proto).To(BeEmpty())
+ Expect(host).To(BeEmpty())
+ })
+
+ It("skips directives without a value", func() {
+ proto, host := parseForwarded("proto;host=h.example")
+ Expect(proto).To(BeEmpty())
+ Expect(host).To(Equal("h.example"))
+ })
+ })
+
+ Context("firstToken helper", func() {
+ It("returns the whole trimmed string when there is no comma", func() {
+ Expect(firstToken(" https ")).To(Equal("https"))
+ })
+
+ It("returns the first trimmed token when there is a comma", func() {
+ Expect(firstToken("https , http")).To(Equal("https"))
+ })
+ })
})
diff --git a/core/http/react-ui/public/locales/en/home.json b/core/http/react-ui/public/locales/en/home.json
index 142767999..35533a5a8 100644
--- a/core/http/react-ui/public/locales/en/home.json
+++ b/core/http/react-ui/public/locales/en/home.json
@@ -82,6 +82,7 @@
"tier": {
"cpu": "CPU-only",
"gpu-small": "GPU",
+ "gpu-mid": "GPU",
"gpu-large": "GPU"
},
"cpuNote": "No GPU detected — these small models stay responsive on CPU.",
diff --git a/core/http/react-ui/public/locales/en/models.json b/core/http/react-ui/public/locales/en/models.json
index 2bf7b018d..bd23d389e 100644
--- a/core/http/react-ui/public/locales/en/models.json
+++ b/core/http/react-ui/public/locales/en/models.json
@@ -2,6 +2,16 @@
"title": "Install Models",
"subtitle": "Browse and install AI models from the gallery",
"models": "Models",
+ "recommended": {
+ "title": "Recommended for your hardware",
+ "cpuNote": "No GPU detected - small models that stay responsive on CPU.",
+ "gpuNote": "Sized to fit your available VRAM with room for context.",
+ "install": "Install",
+ "installing": "Installing",
+ "installStarted": "Installing {{model}}…",
+ "installFailed": "Install failed: {{message}}",
+ "dismiss": "Dismiss recommendations"
+ },
"stats": {
"available": "Available",
"installed": "Installed"
diff --git a/core/http/react-ui/src/App.css b/core/http/react-ui/src/App.css
index 40eddc2e9..4578a3dd8 100644
--- a/core/http/react-ui/src/App.css
+++ b/core/http/react-ui/src/App.css
@@ -6409,6 +6409,9 @@ select.input {
font-size: 0.875rem;
word-break: break-all;
}
+.home-starters-badge {
+ font-size: 0.625rem;
+}
.home-starters-size {
margin-left: auto;
font-size: 0.75rem;
@@ -6416,6 +6419,74 @@ select.input {
white-space: nowrap;
}
+/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */
+
+.rec-models {
+ margin-bottom: var(--spacing-md);
+ padding: var(--spacing-md) var(--spacing-lg);
+}
+.rec-models-head {
+ display: flex;
+ align-items: flex-start;
+ justify-content: space-between;
+ gap: var(--spacing-md);
+}
+.rec-models-title {
+ display: flex;
+ align-items: center;
+ gap: var(--spacing-sm);
+ flex-wrap: wrap;
+}
+.rec-models-title i {
+ color: var(--color-primary);
+}
+.rec-models-note {
+ font-size: 0.8125rem;
+ color: var(--color-text-secondary);
+}
+.rec-models-dismiss {
+ background: none;
+ border: none;
+ color: var(--color-text-muted);
+ cursor: pointer;
+ padding: 4px;
+ flex-shrink: 0;
+}
+.rec-models-dismiss:hover {
+ color: var(--color-text-primary);
+}
+.rec-models-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+ gap: var(--spacing-sm);
+ margin-top: var(--spacing-md);
+}
+.rec-models-item {
+ display: flex;
+ flex-direction: column;
+ gap: var(--spacing-xs);
+ padding: var(--spacing-sm) var(--spacing-md);
+ border: 1px solid var(--color-border-subtle);
+ border-radius: var(--radius-md);
+ background: var(--color-bg-primary);
+}
+.rec-models-item-name {
+ font-weight: 500;
+ font-size: 0.8125rem;
+ word-break: break-all;
+}
+.rec-models-item-meta {
+ display: flex;
+ gap: var(--spacing-sm);
+ font-size: 0.75rem;
+ color: var(--color-text-muted);
+}
+.rec-models-item-fit {
+ display: inline-flex;
+ align-items: center;
+ gap: 4px;
+}
+
/* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
.home-connect {
diff --git a/core/http/react-ui/src/components/RecommendedModels.jsx b/core/http/react-ui/src/components/RecommendedModels.jsx
new file mode 100644
index 000000000..7620406c8
--- /dev/null
+++ b/core/http/react-ui/src/components/RecommendedModels.jsx
@@ -0,0 +1,86 @@
+import { useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { modelsApi } from '../utils/api'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
+
+const DISMISS_KEY = 'localai_rec_models_dismissed'
+
+// "Recommended for your hardware" strip at the top of the Models gallery. Shares
+// the hardware-fit ranking with the empty-state starter widget via
+// useRecommendedModels, but styled for the gallery page and dismissible (the
+// gallery is a repeat-visit surface, so it shouldn't nag).
+export default function RecommendedModels({ addToast }) {
+ const { t } = useTranslation('models')
+ const { recommended, tier, loading } = useRecommendedModels({ count: 4 })
+ const [installing, setInstalling] = useState(() => new Set())
+ const [dismissed, setDismissed] = useState(() => {
+ try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false }
+ })
+
+ if (loading || dismissed) return null
+ if (!recommended || recommended.length === 0) return null
+
+ const dismiss = () => {
+ try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ }
+ setDismissed(true)
+ }
+
+ const install = async (name) => {
+ setInstalling(prev => new Set(prev).add(name))
+ try {
+ await modelsApi.install(name)
+ addToast?.(t('recommended.installStarted', { model: name }), 'success')
+ } catch (err) {
+ addToast?.(t('recommended.installFailed', { message: err.message }), 'error')
+ setInstalling(prev => {
+ const next = new Set(prev)
+ next.delete(name)
+ return next
+ })
+ }
+ }
+
+ const isGpu = tier.id !== 'cpu'
+
+ return (
+
+
+
+
+ {t('recommended.title')}
+ {isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')}
+
+
+
+
+ {recommended.map(m => {
+ const busy = installing.has(m.name)
+ return (
+
+
{m.name}
+
+ {isNvfp4Name(m.name) && NVFP4}
+ {m.sizeDisplay && {m.sizeDisplay}}
+ {isGpu && m.vramDisplay && (
+ {m.vramDisplay}
+ )}
+
+
+
+ )
+ })}
+
+
+ )
+}
diff --git a/core/http/react-ui/src/components/StarterModels.jsx b/core/http/react-ui/src/components/StarterModels.jsx
index 9273ae147..d5f8122b6 100644
--- a/core/http/react-ui/src/components/StarterModels.jsx
+++ b/core/http/react-ui/src/components/StarterModels.jsx
@@ -1,79 +1,78 @@
-import { useState, useEffect, useMemo } from 'react'
+import { useState } from 'react'
import { useTranslation } from 'react-i18next'
import { modelsApi } from '../utils/api'
-import { useResources } from '../hooks/useResources'
+import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
-// Curated, hardware-tiered starter models for the empty-state onboarding. Names
-// are real gallery entries (gallery/index.yaml); we intersect them against the
-// live gallery at render time so a custom/trimmed gallery degrades gracefully
-// (unmatched entries simply don't render).
-//
-// The guiding rule the maintainer asked for: CPU-only machines should be
-// steered to genuinely small models (1-4B, Q4) that stay responsive without a
-// GPU. GPU tiers scale the suggestion up with available VRAM.
-const SMALL = [
- { name: 'llama-3.2-1b-instruct:q4_k_m', size: '~0.8 GB' },
- { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
- { name: 'qwen3-1.7b', size: '~1.4 GB' },
- { name: 'gemma-3-1b-it', size: '~0.8 GB' },
-]
-const MID = [
- { name: 'qwen3-4b', size: '~2.5 GB' },
- { name: 'gemma-3-4b-it', size: '~3 GB' },
- { name: 'llama-3.2-3b-instruct:q4_k_m', size: '~2 GB' },
-]
-const LARGE = [
- { name: 'meta-llama-3.1-8b-instruct', size: '~5 GB' },
- { name: 'qwen3-4b', size: '~2.5 GB' },
- { name: 'mistral-7b-instruct-v0.3', size: '~4 GB' },
-]
+// Static fallback used only when the live gallery / estimates can't be reached
+// (offline, trimmed gallery). The hook is the primary, data-driven path; these
+// are real gallery names kept as a safety net so onboarding never shows nothing.
+// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes
+// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below).
+const BASE = {
+ cpu: [
+ { name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' },
+ { name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' },
+ { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+ { name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' },
+ ],
+ 'gpu-small': [
+ { name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
+ { name: 'lfm2.5-8b-a1b', size: '~5 GB' },
+ { name: 'qwen3.5-9b', size: '~5.5 GB' },
+ { name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' },
+ ],
+ 'gpu-mid': [
+ { name: 'qwen3.6-27b', size: '~16 GB' },
+ { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+ { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+ { name: 'qwen3.5-27b', size: '~16 GB' },
+ ],
+ 'gpu-large': [
+ { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+ { name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' },
+ { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+ { name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' },
+ ],
+}
-const GB = 1024 * 1024 * 1024
+// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with
+// MTP (multi-token prediction) for speed. Only the mid/large tiers have these.
+const NVIDIA = {
+ 'gpu-mid': [
+ { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+ { name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
+ { name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
+ { name: 'qwen3.6-27b', size: '~16 GB' },
+ ],
+ 'gpu-large': [
+ { name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' },
+ { name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
+ { name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
+ { name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
+ ],
+}
-// Pick a tier from detected hardware. total_memory is GPU VRAM in bytes (0 when
-// CPU-only). Thresholds are deliberately conservative so a suggestion that
-// "fits" really does.
-function pickTier(resources) {
- const isGpu = resources?.type === 'gpu'
- const vram = resources?.aggregate?.total_memory || 0
- if (!isGpu || vram <= 0) return { id: 'cpu', list: SMALL }
- if (vram < 8 * GB) return { id: 'gpu-small', list: MID }
- return { id: 'gpu-large', list: LARGE }
+function fallbackFor(tierId, isNvidia) {
+ if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId]
+ return BASE[tierId] || BASE.cpu
}
export default function StarterModels({ addToast, onInstallStarted }) {
const { t } = useTranslation('home')
- const { resources } = useResources()
- const [available, setAvailable] = useState(null) // Set of gallery names, or null while loading
+ const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 })
const [installing, setInstalling] = useState(() => new Set())
- const tier = useMemo(() => pickTier(resources), [resources])
- const candidates = tier.list
+ // While the hardware probe + gallery query are in flight, render nothing
+ // rather than flashing fallback content that may be replaced a moment later.
+ if (loading) return null
- // Verify candidates exist in the live gallery. One search per name (the tier
- // has at most a handful) keeps this resilient to gallery customization.
- useEffect(() => {
- let cancelled = false
- const names = [...new Set(candidates.map(c => c.name))]
- Promise.all(names.map(name =>
- modelsApi.list({ search: name, page: 1 })
- .then(data => (data?.models || []).some(m => (m.name || m.id) === name) ? name : null)
- .catch(() => null)
- )).then(found => {
- if (cancelled) return
- const hits = found.filter(Boolean)
- // If verification yielded nothing (e.g. gallery unreachable), fall back to
- // showing the curated list rather than an empty widget.
- setAvailable(hits.length > 0 ? new Set(hits) : null)
- })
- return () => { cancelled = true }
- }, [candidates])
+ // Prefer live recommendations; fall back to the static list only when the
+ // gallery yielded nothing.
+ const items = (recommended && recommended.length > 0)
+ ? recommended.map(r => ({ name: r.name, size: r.sizeDisplay }))
+ : fallbackFor(tier.id, isNvidia)
- const visible = available === null
- ? candidates
- : candidates.filter(c => available.has(c.name))
-
- if (visible.length === 0) return null
+ if (items.length === 0) return null
const install = async (name) => {
setInstalling(prev => new Set(prev).add(name))
@@ -104,12 +103,13 @@ export default function StarterModels({ addToast, onInstallStarted }) {
{tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
- {visible.map(c => {
+ {items.map(c => {
const busy = installing.has(c.name)
return (
-
{c.name}
- {c.size}
+ {isNvfp4Name(c.name) && NVFP4}
+ {c.size && {c.size}}