mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 09:09:07 -04:00
Compare commits
17 Commits
feat/llama
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
693e3eec05 | ||
|
|
f1e5071321 | ||
|
|
93d6255de3 | ||
|
|
fe4f425fb5 | ||
|
|
fae9f6356f | ||
|
|
066abf82c0 | ||
|
|
a7fec9a49d | ||
|
|
c678530cf0 | ||
|
|
3c63431e46 | ||
|
|
3f647a2764 | ||
|
|
f88981cdce | ||
|
|
0d6de15ae9 | ||
|
|
5c3d48ab50 | ||
|
|
764b0352b9 | ||
|
|
75ba2daba1 | ||
|
|
62b14fd635 | ||
|
|
193d0e6aef |
10
.github/backend-matrix.yml
vendored
10
.github/backend-matrix.yml
vendored
@@ -4974,6 +4974,12 @@ includeDarwin:
|
|||||||
- backend: "kitten-tts"
|
- backend: "kitten-tts"
|
||||||
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
||||||
build-type: "mps"
|
build-type: "mps"
|
||||||
|
- backend: "trl"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-trl"
|
||||||
|
build-type: "mps"
|
||||||
|
- backend: "liquid-audio"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
||||||
|
build-type: "mps"
|
||||||
- backend: "piper"
|
- backend: "piper"
|
||||||
tag-suffix: "-metal-darwin-arm64-piper"
|
tag-suffix: "-metal-darwin-arm64-piper"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
@@ -4990,6 +4996,10 @@ includeDarwin:
|
|||||||
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
lang: "go"
|
lang: "go"
|
||||||
|
- backend: "supertonic"
|
||||||
|
tag-suffix: "-metal-darwin-arm64-supertonic"
|
||||||
|
build-type: "metal"
|
||||||
|
lang: "go"
|
||||||
- backend: "local-store"
|
- backend: "local-store"
|
||||||
tag-suffix: "-metal-darwin-arm64-local-store"
|
tag-suffix: "-metal-darwin-arm64-local-store"
|
||||||
build-type: "metal"
|
build-type: "metal"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
IK_LLAMA_VERSION?=7ccf1d209588962b96eacca325b37e9b3e8faf5e
|
IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
|
||||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=be4a6a63eb2b848e19c277bdcf2bd399e8af76d9
|
LLAMA_VERSION?=8be759e6f70d629638a7eb70db3824cbdcea370b
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
|
|||||||
@@ -37,6 +37,7 @@
|
|||||||
#include "backend.pb.h"
|
#include "backend.pb.h"
|
||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "arg.h"
|
||||||
#include "chat-auto-parser.h"
|
#include "chat-auto-parser.h"
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||||
@@ -592,6 +593,10 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
|||||||
params.checkpoint_min_step = 256;
|
params.checkpoint_min_step = 256;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Raw upstream llama-server flags collected from any option entry that
|
||||||
|
// starts with '-'. Applied once after the loop via common_params_parse.
|
||||||
|
std::vector<std::string> extra_argv;
|
||||||
|
|
||||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||||
for (int i = 0; i < request->options_size(); i++) {
|
for (int i = 0; i < request->options_size(); i++) {
|
||||||
std::string opt = request->options(i);
|
std::string opt = request->options(i);
|
||||||
@@ -1080,6 +1085,31 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
|||||||
} catch (...) {}
|
} catch (...) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- main model MoE on CPU (upstream --cpu-moe / --n-cpu-moe) ---
|
||||||
|
} else if (!strcmp(optname, "cpu_moe")) {
|
||||||
|
// Bool-style flag: keep all MoE expert weights on CPU.
|
||||||
|
const bool enable = (optval == NULL) ||
|
||||||
|
optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
|
||||||
|
optval_str == "on" || optval_str == "enabled";
|
||||||
|
if (enable) {
|
||||||
|
params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
|
||||||
|
}
|
||||||
|
} else if (!strcmp(optname, "n_cpu_moe")) {
|
||||||
|
if (optval != NULL) {
|
||||||
|
try {
|
||||||
|
int n = std::stoi(optval_str);
|
||||||
|
if (n < 0) n = 0;
|
||||||
|
// Keep override-name storage alive for the lifetime of the
|
||||||
|
// params struct (mirrors upstream arg.cpp's function-local static).
|
||||||
|
static std::list<std::string> buft_overrides_main;
|
||||||
|
for (int i = 0; i < n; ++i) {
|
||||||
|
buft_overrides_main.push_back(llm_ffn_exps_block_regex(i));
|
||||||
|
params.tensor_buft_overrides.push_back(
|
||||||
|
{buft_overrides_main.back().c_str(), ggml_backend_cpu_buffer_type()});
|
||||||
|
}
|
||||||
|
} catch (...) {}
|
||||||
|
}
|
||||||
|
|
||||||
// --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
|
// --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
|
||||||
} else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
|
} else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
|
||||||
// Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
|
// Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
|
||||||
@@ -1111,6 +1141,30 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
|||||||
else { cur.push_back(c); }
|
else { cur.push_back(c); }
|
||||||
}
|
}
|
||||||
if (!cur.empty()) flush(cur);
|
if (!cur.empty()) flush(cur);
|
||||||
|
|
||||||
|
// --- generic passthrough: any entry starting with '-' is a raw
|
||||||
|
// upstream llama-server flag, forwarded verbatim to the parser. ---
|
||||||
|
} else if (optname[0] == '-') {
|
||||||
|
std::string flag = optname;
|
||||||
|
// These flags make upstream's parser exit() (printing usage /
|
||||||
|
// completion), which would kill the backend process. Skip them.
|
||||||
|
if (flag == "-h" || flag == "--help" || flag == "--usage" ||
|
||||||
|
flag == "--version" || flag == "--license" ||
|
||||||
|
flag == "--list-devices" || flag == "-cl" ||
|
||||||
|
flag == "--cache-list" ||
|
||||||
|
flag.rfind("--completion", 0) == 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[llama-cpp] ignoring passthrough flag that would exit: %s\n",
|
||||||
|
flag.c_str());
|
||||||
|
} else {
|
||||||
|
extra_argv.push_back(flag);
|
||||||
|
// Preserve the whole value after the first ':' so embedded
|
||||||
|
// colons (e.g. host:port) survive strtok's truncation of optval.
|
||||||
|
auto colon = opt.find(':');
|
||||||
|
if (colon != std::string::npos) {
|
||||||
|
extra_argv.push_back(opt.substr(colon + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1146,27 +1200,6 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!params.kv_overrides.empty()) {
|
|
||||||
params.kv_overrides.emplace_back();
|
|
||||||
params.kv_overrides.back().key[0] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
|
|
||||||
// Real entries are pushed during option parsing; here we pad/terminate so the
|
|
||||||
// model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
|
|
||||||
// and so llama_params_fit has the placeholder slots it requires.
|
|
||||||
{
|
|
||||||
const size_t ntbo = llama_max_tensor_buft_overrides();
|
|
||||||
while (params.tensor_buft_overrides.size() < ntbo) {
|
|
||||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
|
|
||||||
// the main-model handling above.
|
|
||||||
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
|
||||||
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
|
|
||||||
if (!request->tensorsplit().empty()) {
|
if (!request->tensorsplit().empty()) {
|
||||||
@@ -1259,6 +1292,69 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
|||||||
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply any raw upstream flags last so an explicit passthrough flag wins
|
||||||
|
// over the LocalAI-resolved field it maps to (e.g. --ctx-size beats
|
||||||
|
// context_size). This is the same parser llama-server itself uses.
|
||||||
|
if (!extra_argv.empty()) {
|
||||||
|
// common_params_parser_init resets a few fields for the SERVER example
|
||||||
|
// (n_parallel -> -1, use_color). Snapshot n_parallel so an unrelated
|
||||||
|
// passthrough flag can't silently clobber LocalAI's resolved value.
|
||||||
|
const int saved_n_parallel = params.n_parallel;
|
||||||
|
|
||||||
|
std::vector<char *> argv;
|
||||||
|
std::string prog = "llama-server";
|
||||||
|
argv.push_back(prog.data());
|
||||||
|
for (auto & a : extra_argv) {
|
||||||
|
argv.push_back(a.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ctx_arg.params is a reference, so this overlays the given flags onto
|
||||||
|
// `params` in place. Returns false on a recoverable parse error (and
|
||||||
|
// self-restores params); may exit() on a hard error, exactly as
|
||||||
|
// passing the same bad flag to llama-server would.
|
||||||
|
if (!common_params_parse((int)argv.size(), argv.data(), params,
|
||||||
|
LLAMA_EXAMPLE_SERVER)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[llama-cpp] failed to parse passthrough options; ignoring them\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore n_parallel unless a passthrough flag explicitly set it
|
||||||
|
// (parser_init's reset sentinel for SERVER is -1).
|
||||||
|
if (params.n_parallel == -1) {
|
||||||
|
params.n_parallel = saved_n_parallel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Terminate/pad the override vectors only after BOTH the named-option loop
|
||||||
|
// and the generic passthrough (common_params_parse above) have pushed their
|
||||||
|
// real entries, so back() is the null sentinel the model loader asserts on.
|
||||||
|
// Running these before the passthrough let a passthrough flag (--cpu-moe,
|
||||||
|
// --override-tensor, --override-kv, ...) append a real entry after the
|
||||||
|
// sentinel: a GGML_ASSERT crash for tensor_buft_overrides, a silent drop for
|
||||||
|
// kv_overrides. Double-termination is harmless (the while is a no-op if the
|
||||||
|
// passthrough parse already padded; an extra trailing null is ignored).
|
||||||
|
|
||||||
|
if (!params.kv_overrides.empty()) {
|
||||||
|
params.kv_overrides.emplace_back();
|
||||||
|
params.kv_overrides.back().key[0] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
|
||||||
|
// Real entries are pushed during option parsing; here we pad/terminate so the
|
||||||
|
// model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
|
||||||
|
// and so llama_params_fit has the placeholder slots it requires.
|
||||||
|
{
|
||||||
|
const size_t ntbo = llama_max_tensor_buft_overrides();
|
||||||
|
while (params.tensor_buft_overrides.size() < ntbo) {
|
||||||
|
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
|
||||||
|
// the main-model handling above.
|
||||||
|
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
||||||
|
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -117,7 +117,8 @@ libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
|
cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgoacestepcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
test: acestep-cpp
|
test: acestep-cpp
|
||||||
@echo "Running acestep-cpp tests..."
|
@echo "Running acestep-cpp tests..."
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -22,8 +23,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("ACESTEP_LIBRARY")
|
libName := os.Getenv("ACESTEP_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgoacestepcpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgoacestepcpp-fallback.so"
|
libName = "./libgoacestepcpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ mkdir -p $CURDIR/package/lib
|
|||||||
|
|
||||||
cp -avf $CURDIR/acestep-cpp $CURDIR/package/
|
cp -avf $CURDIR/acestep-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
|
||||||
|
cp -fv $CURDIR/libgoacestepcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -12,9 +12,19 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single library variant (Metal or Accelerate). The goacestepcpp
|
||||||
|
# target is built as a CMake MODULE, which emits a .dylib for a SHARED
|
||||||
|
# build but a .so for a MODULE build on Apple, so prefer .dylib and fall
|
||||||
|
# back to .so.
|
||||||
|
LIBRARY="$CURDIR/libgoacestepcpp-fallback.dylib"
|
||||||
|
if [ ! -e "$LIBRARY" ]; then
|
||||||
|
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||||
|
fi
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
|
if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
|
||||||
@@ -36,9 +46,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
|
LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export ACESTEP_LIBRARY=$LIBRARY
|
export ACESTEP_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ libced.so: sources/ced.cpp
|
|||||||
cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
|
cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
|
||||||
cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
|
cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
|
||||||
cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
|
cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/ced.cpp/build-shared/libced.dylib ./ 2>/dev/null || true
|
||||||
cp -fv sources/ced.cpp/include/ced_capi.h ./
|
cp -fv sources/ced.cpp/include/ced_capi.h ./
|
||||||
|
|
||||||
ced-grpc: libced.so main.go goced.go
|
ced-grpc: libced.so main.go goced.go
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -27,8 +28,12 @@ type libFunc struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("CED_LIBRARY")
|
libName := os.Getenv("CED_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "libced.dylib"
|
||||||
|
} else {
|
||||||
libName = "libced.so"
|
libName = "libced.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("ced: dlopen %q: %w", libName, err))
|
panic(fmt.Errorf("ced: dlopen %q: %w", libName, err))
|
||||||
|
|||||||
@@ -15,10 +15,12 @@ mkdir -p "$CURDIR/package/lib"
|
|||||||
cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
|
cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
|
||||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || true
|
||||||
echo "ERROR: libced.so not found in $CURDIR, run 'make' first" >&2
|
cp -avf "$CURDIR"/libced.dylib "$CURDIR/package/lib/" 2>/dev/null || true
|
||||||
|
if ! ls "$CURDIR"/package/lib/libced.* >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: libced shared library not found in $CURDIR, run 'make' first" >&2
|
||||||
exit 1
|
exit 1
|
||||||
}
|
fi
|
||||||
|
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
|||||||
@@ -3,7 +3,12 @@ set -e
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
|
||||||
|
export CED_LIBRARY="$CURDIR/lib/libced.dylib"
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
fi
|
||||||
|
|
||||||
# If a self-contained ld.so was packaged, route through it so the packaged
|
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||||
# libc / libstdc++ are used instead of the host's (matches the sibling backends).
|
# libc / libstdc++ are used instead of the host's (matches the sibling backends).
|
||||||
|
|||||||
@@ -75,7 +75,8 @@ UNAME_S := $(shell uname -s)
|
|||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
|
VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
|
||||||
else
|
else
|
||||||
VARIANT_TARGETS = libgocrispasr-fallback.so
|
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||||
|
VARIANT_TARGETS = libgocrispasr-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
|
crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
|
||||||
@@ -87,7 +88,7 @@ package: crispasr
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf libgocrispasr*.so package sources/CrispASR crispasr
|
rm -rf libgocrispasr*.so libgocrispasr*.dylib package sources/CrispASR crispasr
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -118,13 +119,21 @@ libgocrispasr-fallback.so: sources/CrispASR
|
|||||||
SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
||||||
rm -rfv build*
|
rm -rfv build*
|
||||||
|
|
||||||
|
# Build fallback variant as a dylib (Darwin)
|
||||||
|
libgocrispasr-fallback.dylib: sources/CrispASR
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info ${GREEN}I crispasr build info:fallback (dylib)${RESET})
|
||||||
|
SO_TARGET=libgocrispasr-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
||||||
|
rm -rfv build*
|
||||||
|
|
||||||
libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
|
libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
cd build-$(SO_TARGET) && \
|
cd build-$(SO_TARGET) && \
|
||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgocrispasr.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
test: crispasr
|
test: crispasr
|
||||||
CGO_ENABLED=0 $(GOCMD) test -v ./...
|
CGO_ENABLED=0 $(GOCMD) test -v ./...
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ type LibFuncs struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("CRISPASR_LIBRARY")
|
libName := os.Getenv("CRISPASR_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgocrispasr-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgocrispasr-fallback.so"
|
libName = "./libgocrispasr-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/crispasr $CURDIR/package/
|
cp -avf $CURDIR/crispasr $CURDIR/package/
|
||||||
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgocrispasr-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgocrispasr-avx.so ]; then
|
if [ -e $CURDIR/libgocrispasr-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgocrispasr-avx512.so"
|
LIBRARY="$CURDIR/libgocrispasr-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export CRISPASR_LIBRARY=$LIBRARY
|
export CRISPASR_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# Point piper's espeak-ng phonemizer at the bundled voice data. The variable
|
# Point piper's espeak-ng phonemizer at the bundled voice data. The variable
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ ifeq ($(UNAME_S),Linux)
|
|||||||
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
|
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
|
||||||
else
|
else
|
||||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||||
VARIANT_TARGETS = libdepthanythingcpp-fallback.so
|
VARIANT_TARGETS = libdepthanythingcpp-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
|
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
|
||||||
@@ -89,7 +89,7 @@ package: depth-anything-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
|
rm -rf libdepthanythingcpp*.so libdepthanythingcpp*.dylib depth-anything-cpp package sources
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -116,11 +116,19 @@ libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# Build fallback variant (all platforms)
|
# Build fallback variant (all platforms)
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
libdepthanythingcpp-fallback.dylib: sources/depth-anything.cpp
|
||||||
|
rm -rfv build-$@
|
||||||
|
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||||
|
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||||
|
rm -rfv build-$@
|
||||||
|
else
|
||||||
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
|
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
|
||||||
rm -rfv build-$@
|
rm -rfv build-$@
|
||||||
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||||
rm -rfv build-$@
|
rm -rfv build-$@
|
||||||
|
endif
|
||||||
|
|
||||||
libdepthanythingcpp-custom: CMakeLists.txt
|
libdepthanythingcpp-custom: CMakeLists.txt
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
@@ -128,7 +136,8 @@ libdepthanythingcpp-custom: CMakeLists.txt
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libdepthanything.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
all: depth-anything-cpp package
|
all: depth-anything-cpp package
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -27,8 +28,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
|
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libdepthanythingcpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libdepthanythingcpp-fallback.so"
|
libName = "./libdepthanythingcpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
# Create lib directory
|
# Create lib directory
|
||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libdepthanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
|
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
|
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
|
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export DEPTHANYTHING_LIBRARY=$LIBRARY
|
export DEPTHANYTHING_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -67,8 +67,9 @@ $(LIB_SENTINEL): sources/LocalVQE
|
|||||||
# that the loader picks at runtime. We must build every target — the
|
# that the loader picks at runtime. We must build every target — the
|
||||||
# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
|
# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
|
||||||
# routes all of them into build/bin; copy them out next to the binary.
|
# routes all of them into build/bin; copy them out next to the binary.
|
||||||
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.so* .
|
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/bin/liblocalvqe.dylib . 2>/dev/null || cp -P build/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.dylib .
|
||||||
cp -P build/bin/libggml*.so* . 2>/dev/null || true
|
cp -P build/bin/libggml*.so* . 2>/dev/null || true
|
||||||
|
cp -P build/bin/libggml*.dylib . 2>/dev/null || true
|
||||||
touch $(LIB_SENTINEL)
|
touch $(LIB_SENTINEL)
|
||||||
|
|
||||||
liblocalvqe.so: $(LIB_SENTINEL)
|
liblocalvqe.so: $(LIB_SENTINEL)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ type LibFuncs struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("LOCALVQE_LIBRARY")
|
libName := os.Getenv("LOCALVQE_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./liblocalvqe.dylib"
|
||||||
|
} else {
|
||||||
libName = "./liblocalvqe.so"
|
libName = "./liblocalvqe.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ cp -avf $CURDIR/localvqe $CURDIR/package/
|
|||||||
# liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
|
# liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
|
||||||
# variants — LocalVQE picks the matching CPU variant at load time.
|
# variants — LocalVQE picks the matching CPU variant at load time.
|
||||||
cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
|
cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -P $CURDIR/liblocalvqe.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
|
cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -P $CURDIR/libggml*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -10,8 +10,19 @@ CURDIR=$(dirname "$(realpath $0)")
|
|||||||
# exec'ing the binary.
|
# exec'ing the binary.
|
||||||
cd "$CURDIR"
|
cd "$CURDIR"
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
# macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
|
||||||
|
# DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.dylib
|
||||||
|
if [ ! -e "$LOCALVQE_LIBRARY" ]; then
|
||||||
|
LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
||||||
|
fi
|
||||||
|
export LOCALVQE_LIBRARY
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
echo "Using lib/ld.so"
|
echo "Using lib/ld.so"
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ ifeq ($(UNAME_S),Linux)
|
|||||||
VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
|
VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
|
||||||
else
|
else
|
||||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||||
VARIANT_TARGETS = liblocateanythingcpp-fallback.so
|
VARIANT_TARGETS = liblocateanythingcpp-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
|
locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
|
||||||
@@ -82,7 +82,7 @@ package: locate-anything-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources
|
rm -rf liblocateanythingcpp*.so liblocateanythingcpp*.dylib locate-anything-cpp package sources
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -109,11 +109,19 @@ liblocateanythingcpp-avx512.so: sources/locate-anything.cpp
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# Build fallback variant (all platforms)
|
# Build fallback variant (all platforms)
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
liblocateanythingcpp-fallback.dylib: sources/locate-anything.cpp
|
||||||
|
rm -rfv build-$@
|
||||||
|
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
||||||
|
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
||||||
|
rm -rfv build-$@
|
||||||
|
else
|
||||||
liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
|
liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
|
||||||
rm -rfv build-$@
|
rm -rfv build-$@
|
||||||
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
||||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
||||||
rm -rfv build-$@
|
rm -rfv build-$@
|
||||||
|
endif
|
||||||
|
|
||||||
liblocateanythingcpp-custom: CMakeLists.txt
|
liblocateanythingcpp-custom: CMakeLists.txt
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
@@ -121,7 +129,8 @@ liblocateanythingcpp-custom: CMakeLists.txt
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/liblocateanythingcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
all: locate-anything-cpp package
|
all: locate-anything-cpp package
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -27,8 +28,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("LOCATEANYTHING_LIBRARY")
|
libName := os.Getenv("LOCATEANYTHING_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./liblocateanythingcpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./liblocateanythingcpp-fallback.so"
|
libName = "./liblocateanythingcpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
# Create lib directory
|
# Create lib directory
|
||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/liblocateanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
|
cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
|
if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
|
LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export LOCATEANYTHING_LIBRARY=$LIBRARY
|
export LOCATEANYTHING_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# omnivoice.cpp version
|
# omnivoice.cpp version
|
||||||
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
||||||
OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
|
OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
|
||||||
SO_TARGET?=libgomnivoicecpp.so
|
SO_TARGET?=libgomnivoicecpp.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
@@ -65,7 +65,8 @@ UNAME_S := $(shell uname -s)
|
|||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
|
VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
|
||||||
else
|
else
|
||||||
VARIANT_TARGETS = libgomnivoicecpp-fallback.so
|
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||||
|
VARIANT_TARGETS = libgomnivoicecpp-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
|
omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
|
||||||
@@ -77,7 +78,7 @@ package: omnivoice-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf libgomnivoicecpp*.so package sources/omnivoice.cpp omnivoice-cpp
|
rm -rf libgomnivoicecpp*.so libgomnivoicecpp*.dylib package sources/omnivoice.cpp omnivoice-cpp
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -106,13 +107,20 @@ libgomnivoicecpp-fallback.so: sources/omnivoice.cpp
|
|||||||
SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
||||||
rm -rf build-libgomnivoicecpp-fallback.so
|
rm -rf build-libgomnivoicecpp-fallback.so
|
||||||
|
|
||||||
|
# Build fallback variant as a dylib (Darwin)
|
||||||
|
libgomnivoicecpp-fallback.dylib: sources/omnivoice.cpp
|
||||||
|
$(info ${GREEN}I omnivoice-cpp build info:fallback (dylib)${RESET})
|
||||||
|
SO_TARGET=libgomnivoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
||||||
|
rm -rf build-libgomnivoicecpp-fallback.dylib
|
||||||
|
|
||||||
libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
|
libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
cd build-$(SO_TARGET) && \
|
cd build-$(SO_TARGET) && \
|
||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
|
cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgomnivoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
test: omnivoice-cpp
|
test: omnivoice-cpp
|
||||||
@echo "Running omnivoice-cpp tests..."
|
@echo "Running omnivoice-cpp tests..."
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ type LibFuncs struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("OMNIVOICE_LIBRARY")
|
libName := os.Getenv("OMNIVOICE_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgomnivoicecpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgomnivoicecpp-fallback.so"
|
libName = "./libgomnivoicecpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
|
cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgomnivoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
|
if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
|
LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export OMNIVOICE_LIBRARY=$LIBRARY
|
export OMNIVOICE_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ libparakeet.so: sources/parakeet.cpp
|
|||||||
cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
|
cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
|
||||||
cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
|
cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
|
||||||
cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
|
cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
|
||||||
|
cp -fv sources/parakeet.cpp/build-shared/libparakeet.dylib ./ 2>/dev/null || true
|
||||||
cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
|
cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
|
||||||
|
|
||||||
parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go
|
parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go
|
||||||
|
|||||||
@@ -2,15 +2,17 @@ package main
|
|||||||
|
|
||||||
// Started internally by LocalAI - one gRPC server per loaded model.
|
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||||
//
|
//
|
||||||
// Loads libparakeet.so via purego and registers the flat C-API entry
|
// Loads the parakeet shared library via purego and registers the flat
|
||||||
// points declared in parakeet_capi.h. The library name can be overridden
|
// C-API entry points declared in parakeet_capi.h. The library name can be
|
||||||
// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
|
// overridden with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY /
|
||||||
// convention in the sibling backends); the default looks for the .so next
|
// VIBEVOICECPP_LIBRARY convention in the sibling backends); the default
|
||||||
// to this binary.
|
// looks next to this binary for libparakeet.so on Linux and
|
||||||
|
// libparakeet.dylib on macOS.
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -28,8 +30,12 @@ type LibFuncs struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("PARAKEET_LIBRARY")
|
libName := os.Getenv("PARAKEET_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "libparakeet.dylib"
|
||||||
|
} else {
|
||||||
libName = "libparakeet.so"
|
libName = "libparakeet.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -16,12 +16,15 @@ mkdir -p "$CURDIR/package/lib"
|
|||||||
cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
|
cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
|
||||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||||
|
|
||||||
# libparakeet.so + any soname symlinks (libparakeet.so.X[.Y]). purego.Dlopen
|
# libparakeet shared lib + any soname symlinks. On Linux this is
|
||||||
# resolves it via LD_LIBRARY_PATH, which run.sh points at lib/.
|
# libparakeet.so[.X.Y]; on macOS it is libparakeet.dylib. purego.Dlopen
|
||||||
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
# resolves it via the *_LIBRARY_PATH that run.sh points at lib/.
|
||||||
echo "ERROR: libparakeet.so not found in $CURDIR, run 'make' first" >&2
|
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || true
|
||||||
|
cp -avf "$CURDIR"/libparakeet.dylib "$CURDIR/package/lib/" 2>/dev/null || true
|
||||||
|
if ! ls "$CURDIR"/package/lib/libparakeet.* >/dev/null 2>&1; then
|
||||||
|
echo "ERROR: libparakeet shared library not found in $CURDIR, run 'make' first" >&2
|
||||||
exit 1
|
exit 1
|
||||||
}
|
fi
|
||||||
|
|
||||||
# Detect architecture and copy the core runtime libs libparakeet.so links
|
# Detect architecture and copy the core runtime libs libparakeet.so links
|
||||||
# against, plus the matching dynamic loader as lib/ld.so.
|
# against, plus the matching dynamic loader as lib/ld.so.
|
||||||
@@ -48,7 +51,7 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||||
elif [ "$(uname -s)" = "Darwin" ]; then
|
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||||
echo "Detected Darwin"
|
echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
|
||||||
else
|
else
|
||||||
echo "Error: Could not detect architecture"
|
echo "Error: Could not detect architecture"
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@@ -3,11 +3,17 @@ set -e
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
|
||||||
|
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||||
|
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
|
||||||
|
fi
|
||||||
|
|
||||||
# If a self-contained ld.so was packaged, route through it so the
|
# If a self-contained ld.so was packaged, route through it so the
|
||||||
# packaged libc / libstdc++ are used instead of the host's (matches the
|
# packaged libc / libstdc++ are used instead of the host's (matches the
|
||||||
# whisper backend's runtime layout).
|
# whisper backend's runtime layout). Linux only.
|
||||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||||
echo "Using lib/ld.so"
|
echo "Using lib/ld.so"
|
||||||
exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"
|
exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# qwentts.cpp version
|
# qwentts.cpp version
|
||||||
QWEN3TTS_REPO?=https://github.com/ServeurpersoCom/qwentts.cpp
|
QWEN3TTS_REPO?=https://github.com/ServeurpersoCom/qwentts.cpp
|
||||||
QWEN3TTS_CPP_VERSION?=4536dcdce27c3764a93a06d6bf64026b124962f5
|
QWEN3TTS_CPP_VERSION?=9dbe7ea26a01b30fccb117ae5e86807c1dc23d42
|
||||||
SO_TARGET?=libgoqwen3ttscpp.so
|
SO_TARGET?=libgoqwen3ttscpp.so
|
||||||
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
@@ -65,8 +65,8 @@ UNAME_S := $(shell uname -s)
|
|||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
|
VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
|
||||||
else
|
else
|
||||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||||
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so
|
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
|
qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
|
||||||
@@ -78,7 +78,7 @@ package: qwen3-tts-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf libgoqwen3ttscpp*.so package sources/qwentts.cpp qwen3-tts-cpp
|
rm -rf libgoqwen3ttscpp*.so libgoqwen3ttscpp*.dylib package sources/qwentts.cpp qwen3-tts-cpp
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -110,13 +110,20 @@ libgoqwen3ttscpp-fallback.so: sources/qwentts.cpp
|
|||||||
SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
||||||
rm -rf build-libgoqwen3ttscpp-fallback.so
|
rm -rf build-libgoqwen3ttscpp-fallback.so
|
||||||
|
|
||||||
|
# Build fallback variant as a dylib (Darwin)
|
||||||
|
libgoqwen3ttscpp-fallback.dylib: sources/qwentts.cpp
|
||||||
|
$(info ${GREEN}I qwen3-tts-cpp build info:fallback (dylib)${RESET})
|
||||||
|
SO_TARGET=libgoqwen3ttscpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
||||||
|
rm -rf build-libgoqwen3ttscpp-fallback.dylib
|
||||||
|
|
||||||
libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
|
libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
cd build-$(SO_TARGET) && \
|
cd build-$(SO_TARGET) && \
|
||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
|
cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgoqwen3ttscpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
test: qwen3-tts-cpp
|
test: qwen3-tts-cpp
|
||||||
@echo "Running qwen3-tts-cpp tests..."
|
@echo "Running qwen3-tts-cpp tests..."
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ type LibFuncs struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("QWEN3TTS_LIBRARY")
|
libName := os.Getenv("QWEN3TTS_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgoqwen3ttscpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgoqwen3ttscpp-fallback.so"
|
libName = "./libgoqwen3ttscpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
|
cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgoqwen3ttscpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
|
if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
|
LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export QWEN3TTS_LIBRARY=$LIBRARY
|
export QWEN3TTS_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ ifeq ($(UNAME_S),Linux)
|
|||||||
VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so
|
VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so
|
||||||
else
|
else
|
||||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||||
VARIANT_TARGETS = librfdetrcpp-fallback.so
|
VARIANT_TARGETS = librfdetrcpp-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS)
|
rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS)
|
||||||
@@ -83,7 +83,7 @@ package: rfdetr-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf librfdetrcpp*.so rfdetr-cpp package sources
|
rm -rf librfdetrcpp*.so librfdetrcpp*.dylib rfdetr-cpp package sources
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -110,11 +110,19 @@ librfdetrcpp-avx512.so: sources/rt-detr.cpp
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# Build fallback variant (all platforms)
|
# Build fallback variant (all platforms)
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
librfdetrcpp-fallback.dylib: sources/rt-detr.cpp
|
||||||
|
rm -rfv build-$@
|
||||||
|
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
|
||||||
|
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
|
||||||
|
rm -rfv build-$@
|
||||||
|
else
|
||||||
librfdetrcpp-fallback.so: sources/rt-detr.cpp
|
librfdetrcpp-fallback.so: sources/rt-detr.cpp
|
||||||
rm -rfv build-$@
|
rm -rfv build-$@
|
||||||
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
|
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
|
||||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
|
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
|
||||||
rm -rfv build-$@
|
rm -rfv build-$@
|
||||||
|
endif
|
||||||
|
|
||||||
librfdetrcpp-custom: CMakeLists.txt
|
librfdetrcpp-custom: CMakeLists.txt
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
@@ -122,7 +130,8 @@ librfdetrcpp-custom: CMakeLists.txt
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/librfdetrcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
all: rfdetr-cpp package
|
all: rfdetr-cpp package
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -27,8 +28,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("RFDETR_LIBRARY")
|
libName := os.Getenv("RFDETR_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./librfdetrcpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./librfdetrcpp-fallback.so"
|
libName = "./librfdetrcpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
# Create lib directory
|
# Create lib directory
|
||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/librfdetrcpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/librfdetrcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/librfdetrcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/
|
cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/librfdetrcpp-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
|
if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
|
LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export RFDETR_LIBRARY=$LIBRARY
|
export RFDETR_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ ifeq ($(UNAME_S),Linux)
|
|||||||
VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
|
VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
|
||||||
else
|
else
|
||||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||||
VARIANT_TARGETS = libgosam3-fallback.so
|
VARIANT_TARGETS = libgosam3-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
|
sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
|
||||||
@@ -78,7 +78,7 @@ package: sam3-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf libgosam3*.so sam3-cpp package sources
|
rm -rf libgosam3*.so libgosam3*.dylib sam3-cpp package sources
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -105,11 +105,19 @@ libgosam3-avx512.so: sources/sam3.cpp
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# Build fallback variant (all platforms)
|
# Build fallback variant (all platforms)
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
libgosam3-fallback.dylib: sources/sam3.cpp
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
|
||||||
|
SO_TARGET=libgosam3-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
||||||
|
rm -rfv build*
|
||||||
|
else
|
||||||
libgosam3-fallback.so: sources/sam3.cpp
|
libgosam3-fallback.so: sources/sam3.cpp
|
||||||
$(MAKE) purge
|
$(MAKE) purge
|
||||||
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
|
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
|
||||||
SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
||||||
rm -rfv build*
|
rm -rfv build*
|
||||||
|
endif
|
||||||
|
|
||||||
libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
|
libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
@@ -117,6 +125,7 @@ libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgosam3.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
all: sam3-cpp package
|
all: sam3-cpp package
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("SAM3_LIBRARY")
|
libName := os.Getenv("SAM3_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgosam3-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgosam3-fallback.so"
|
libName = "./libgosam3-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
# Create lib directory
|
# Create lib directory
|
||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgosam3-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgosam3-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -avf $CURDIR/sam3-cpp $CURDIR/package/
|
cp -avf $CURDIR/sam3-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgosam3-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgosam3-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgosam3-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgosam3-avx.so ]; then
|
if [ -e $CURDIR/libgosam3-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgosam3-avx512.so"
|
LIBRARY="$CURDIR/libgosam3-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export SAM3_LIBRARY=$LIBRARY
|
export SAM3_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -238,12 +239,20 @@ func loadSherpaLibs() error {
|
|||||||
func loadSherpaLibsOnce() error {
|
func loadSherpaLibsOnce() error {
|
||||||
shimLib := os.Getenv("SHERPA_SHIM_LIBRARY")
|
shimLib := os.Getenv("SHERPA_SHIM_LIBRARY")
|
||||||
if shimLib == "" {
|
if shimLib == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
shimLib = "libsherpa-shim.dylib"
|
||||||
|
} else {
|
||||||
shimLib = "libsherpa-shim.so"
|
shimLib = "libsherpa-shim.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
capiLib := os.Getenv("SHERPA_ONNX_LIBRARY")
|
capiLib := os.Getenv("SHERPA_ONNX_LIBRARY")
|
||||||
if capiLib == "" {
|
if capiLib == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
capiLib = "libsherpa-onnx-c-api.dylib"
|
||||||
|
} else {
|
||||||
capiLib = "libsherpa-onnx-c-api.so"
|
capiLib = "libsherpa-onnx-c-api.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -3,7 +3,13 @@ set -ex
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
export SHERPA_SHIM_LIBRARY=$CURDIR/lib/libsherpa-shim.dylib
|
||||||
|
export SHERPA_ONNX_LIBRARY=$CURDIR/lib/libsherpa-onnx-c-api.dylib
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
echo "Using lib/ld.so"
|
echo "Using lib/ld.so"
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
STABLEDIFFUSION_GGML_VERSION?=8caa3f908ae6d4a4bef531e73b9a969f266a3d1f
|
||||||
|
|
||||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||||
|
|
||||||
@@ -131,6 +131,7 @@ libgosd-custom: CMakeLists.txt cpp/gosd.cpp cpp/gosd.h
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgosd.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
all: stablediffusion-ggml package
|
all: stablediffusion-ggml package
|
||||||
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("SD_LIBRARY")
|
libName := os.Getenv("SD_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgosd-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgosd-fallback.so"
|
libName = "./libgosd-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/libgosd-*.so $CURDIR/package/
|
cp -avf $CURDIR/libgosd-*.so $CURDIR/package/
|
||||||
|
cp -fv $CURDIR/libgosd-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
|
cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,18 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgosd-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single library variant (Metal or Accelerate). The gosd target is
|
||||||
|
# built as a CMake MODULE, which emits a .dylib for a SHARED build but a
|
||||||
|
# .so for a MODULE build on Apple, so prefer .dylib and fall back to .so.
|
||||||
|
LIBRARY="$CURDIR/libgosd-fallback.dylib"
|
||||||
|
if [ ! -e "$LIBRARY" ]; then
|
||||||
|
LIBRARY="$CURDIR/libgosd-fallback.so"
|
||||||
|
fi
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgosd-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgosd-avx.so ]; then
|
if [ -e $CURDIR/libgosd-avx.so ]; then
|
||||||
@@ -36,9 +45,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgosd-avx512.so"
|
LIBRARY="$CURDIR/libgosd-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export SD_LIBRARY=$LIBRARY
|
export SD_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
"unicode"
|
"unicode"
|
||||||
@@ -943,9 +944,15 @@ func InitializeONNXRuntime() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if libPath == "" {
|
if libPath == "" {
|
||||||
|
// LocalAI: default to the platform-native shared library
|
||||||
|
// extension when nothing else is found (dyld vs ld.so).
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libPath = "/usr/local/lib/libonnxruntime.dylib"
|
||||||
|
} else {
|
||||||
libPath = "/usr/local/lib/libonnxruntime.so"
|
libPath = "/usr/local/lib/libonnxruntime.so"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
ort.SetSharedLibraryPath(libPath)
|
ort.SetSharedLibraryPath(libPath)
|
||||||
|
|
||||||
if err := ort.InitializeEnvironment(); err != nil {
|
if err := ort.InitializeEnvironment(); err != nil {
|
||||||
|
|||||||
@@ -32,6 +32,10 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
|||||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
elif [ $(uname -s) = "Darwin" ]; then
|
||||||
|
# macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in
|
||||||
|
# run.sh); there is no ld.so loader nor glibc to bundle.
|
||||||
|
echo "Detected Darwin"
|
||||||
else
|
else
|
||||||
echo "Error: Could not detect architecture"
|
echo "Error: Could not detect architecture"
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@@ -3,12 +3,19 @@ set -ex
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath $0)")
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
# macOS uses dyld: there is no ld.so loader, and the search path env
|
||||||
|
# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||||
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
echo "Using lib/ld.so"
|
echo "Using lib/ld.so"
|
||||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exec $CURDIR/supertonic "$@"
|
exec $CURDIR/supertonic "$@"
|
||||||
|
|||||||
@@ -70,8 +70,8 @@ UNAME_S := $(shell uname -s)
|
|||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so
|
VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so
|
||||||
else
|
else
|
||||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||||
VARIANT_TARGETS = libgovibevoicecpp-fallback.so
|
VARIANT_TARGETS = libgovibevoicecpp-fallback.dylib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS)
|
vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS)
|
||||||
@@ -83,7 +83,7 @@ package: vibevoice-cpp
|
|||||||
build: package
|
build: package
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf libgovibevoicecpp*.so package sources/vibevoice.cpp vibevoice-cpp
|
rm -rf libgovibevoicecpp*.so libgovibevoicecpp*.dylib package sources/vibevoice.cpp vibevoice-cpp
|
||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf build*
|
rm -rf build*
|
||||||
@@ -119,13 +119,21 @@ libgovibevoicecpp-fallback.so: sources/vibevoice.cpp
|
|||||||
SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
|
SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
|
||||||
rm -rfv build*
|
rm -rfv build*
|
||||||
|
|
||||||
|
# Build fallback variant as a dylib (Darwin)
|
||||||
|
libgovibevoicecpp-fallback.dylib: sources/vibevoice.cpp
|
||||||
|
$(MAKE) purge
|
||||||
|
$(info ${GREEN}I vibevoice-cpp build info:fallback (dylib)${RESET})
|
||||||
|
SO_TARGET=libgovibevoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
|
||||||
|
rm -rfv build*
|
||||||
|
|
||||||
libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h
|
libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h
|
||||||
mkdir -p build-$(SO_TARGET) && \
|
mkdir -p build-$(SO_TARGET) && \
|
||||||
cd build-$(SO_TARGET) && \
|
cd build-$(SO_TARGET) && \
|
||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \
|
cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET)
|
(mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgovibevoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||||
|
|
||||||
test: vibevoice-cpp
|
test: vibevoice-cpp
|
||||||
@echo "Running vibevoice-cpp tests..."
|
@echo "Running vibevoice-cpp tests..."
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -21,8 +22,12 @@ type LibFuncs struct {
|
|||||||
func main() {
|
func main() {
|
||||||
libName := os.Getenv("VIBEVOICECPP_LIBRARY")
|
libName := os.Getenv("VIBEVOICECPP_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgovibevoicecpp-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgovibevoicecpp-fallback.so"
|
libName = "./libgovibevoicecpp-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/
|
cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/
|
||||||
cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgovibevoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -11,9 +11,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
|
if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
|
||||||
@@ -34,9 +38,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
|
LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export VIBEVOICECPP_LIBRARY=$LIBRARY
|
export VIBEVOICECPP_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
if [ -f $CURDIR/lib/ld.so ]; then
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
|
|||||||
@@ -117,6 +117,7 @@ libgowhisper-custom: CMakeLists.txt cpp/gowhisper.cpp cpp/gowhisper.h
|
|||||||
cmake .. $(CMAKE_ARGS) && \
|
cmake .. $(CMAKE_ARGS) && \
|
||||||
cmake --build . --config Release -j$(JOBS) && \
|
cmake --build . --config Release -j$(JOBS) && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET)
|
mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET) 2>/dev/null || \
|
||||||
|
mv build-$(SO_TARGET)/libgowhisper.dylib ./$(SO_TARGET:.so=.dylib)
|
||||||
|
|
||||||
all: whisper package
|
all: whisper package
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/ebitengine/purego"
|
"github.com/ebitengine/purego"
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
@@ -22,8 +23,12 @@ func main() {
|
|||||||
// Get library name from environment variable, default to fallback
|
// Get library name from environment variable, default to fallback
|
||||||
libName := os.Getenv("WHISPER_LIBRARY")
|
libName := os.Getenv("WHISPER_LIBRARY")
|
||||||
if libName == "" {
|
if libName == "" {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
libName = "./libgowhisper-fallback.dylib"
|
||||||
|
} else {
|
||||||
libName = "./libgowhisper-fallback.so"
|
libName = "./libgowhisper-fallback.so"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
|||||||
mkdir -p $CURDIR/package/lib
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
cp -avf $CURDIR/whisper $CURDIR/package/
|
cp -avf $CURDIR/whisper $CURDIR/package/
|
||||||
cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/
|
cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/ 2>/dev/null || true
|
||||||
|
cp -fv $CURDIR/libgowhisper-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
|
|||||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
grep -e "flags" /proc/cpuinfo | head -1
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS: single dylib variant (Metal or Accelerate)
|
||||||
|
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||||
|
|
||||||
if [ "$(uname)" != "Darwin" ]; then
|
|
||||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
echo "CPU: AVX found OK"
|
echo "CPU: AVX found OK"
|
||||||
if [ -e $CURDIR/libgowhisper-avx.so ]; then
|
if [ -e $CURDIR/libgowhisper-avx.so ]; then
|
||||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
|||||||
LIBRARY="$CURDIR/libgowhisper-avx512.so"
|
LIBRARY="$CURDIR/libgowhisper-avx512.so"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
|
||||||
export WHISPER_LIBRARY=$LIBRARY
|
export WHISPER_LIBRARY=$LIBRARY
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
|
|||||||
@@ -1284,6 +1284,7 @@
|
|||||||
nvidia-cuda-13: "cuda13-liquid-audio"
|
nvidia-cuda-13: "cuda13-liquid-audio"
|
||||||
nvidia-cuda-12: "cuda12-liquid-audio"
|
nvidia-cuda-12: "cuda12-liquid-audio"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
||||||
|
metal: "metal-liquid-audio"
|
||||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
||||||
- &qwen-tts
|
- &qwen-tts
|
||||||
urls:
|
urls:
|
||||||
@@ -1569,6 +1570,7 @@
|
|||||||
- TTS
|
- TTS
|
||||||
capabilities:
|
capabilities:
|
||||||
default: "cpu-supertonic"
|
default: "cpu-supertonic"
|
||||||
|
metal: "metal-supertonic"
|
||||||
- !!merge <<: *neutts
|
- !!merge <<: *neutts
|
||||||
name: "neutts-development"
|
name: "neutts-development"
|
||||||
capabilities:
|
capabilities:
|
||||||
@@ -4612,6 +4614,7 @@
|
|||||||
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
||||||
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
||||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
||||||
|
metal: "metal-liquid-audio-development"
|
||||||
- !!merge <<: *liquid-audio
|
- !!merge <<: *liquid-audio
|
||||||
name: "cpu-liquid-audio"
|
name: "cpu-liquid-audio"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
||||||
@@ -4622,6 +4625,16 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-cpu-liquid-audio
|
- localai/localai-backends:master-cpu-liquid-audio
|
||||||
|
- !!merge <<: *liquid-audio
|
||||||
|
name: "metal-liquid-audio"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-liquid-audio
|
||||||
|
- !!merge <<: *liquid-audio
|
||||||
|
name: "metal-liquid-audio-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-liquid-audio
|
||||||
- !!merge <<: *liquid-audio
|
- !!merge <<: *liquid-audio
|
||||||
name: "cuda12-liquid-audio"
|
name: "cuda12-liquid-audio"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
||||||
@@ -5282,6 +5295,7 @@
|
|||||||
nvidia: "cuda12-trl"
|
nvidia: "cuda12-trl"
|
||||||
nvidia-cuda-12: "cuda12-trl"
|
nvidia-cuda-12: "cuda12-trl"
|
||||||
nvidia-cuda-13: "cuda13-trl"
|
nvidia-cuda-13: "cuda13-trl"
|
||||||
|
metal: "metal-trl"
|
||||||
## TRL backend images
|
## TRL backend images
|
||||||
- !!merge <<: *trl
|
- !!merge <<: *trl
|
||||||
name: "cpu-trl"
|
name: "cpu-trl"
|
||||||
@@ -5313,6 +5327,16 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
|
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
|
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
|
||||||
|
- !!merge <<: *trl
|
||||||
|
name: "metal-trl"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-trl
|
||||||
|
- !!merge <<: *trl
|
||||||
|
name: "metal-trl-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-trl
|
||||||
## llama.cpp quantization backend
|
## llama.cpp quantization backend
|
||||||
- &llama-cpp-quantization
|
- &llama-cpp-quantization
|
||||||
name: "llama-cpp-quantization"
|
name: "llama-cpp-quantization"
|
||||||
@@ -5484,6 +5508,7 @@
|
|||||||
name: "supertonic-development"
|
name: "supertonic-development"
|
||||||
capabilities:
|
capabilities:
|
||||||
default: "cpu-supertonic-development"
|
default: "cpu-supertonic-development"
|
||||||
|
metal: "metal-supertonic-development"
|
||||||
- !!merge <<: *supertonic
|
- !!merge <<: *supertonic
|
||||||
name: "cpu-supertonic"
|
name: "cpu-supertonic"
|
||||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
||||||
@@ -5494,3 +5519,13 @@
|
|||||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
||||||
mirrors:
|
mirrors:
|
||||||
- localai/localai-backends:master-cpu-supertonic
|
- localai/localai-backends:master-cpu-supertonic
|
||||||
|
- !!merge <<: *supertonic
|
||||||
|
name: "metal-supertonic"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:latest-metal-darwin-arm64-supertonic
|
||||||
|
- !!merge <<: *supertonic
|
||||||
|
name: "metal-supertonic-development"
|
||||||
|
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic"
|
||||||
|
mirrors:
|
||||||
|
- localai/localai-backends:master-metal-darwin-arm64-supertonic
|
||||||
|
|||||||
@@ -14,5 +14,11 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
||||||
|
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
||||||
|
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
||||||
|
# it on the uv path; Linux/CUDA resolution is unchanged.
|
||||||
|
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
||||||
|
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
||||||
|
fi
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
# MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job.
|
||||||
torch>=2.8.0
|
torch>=2.8.0
|
||||||
torchaudio>=2.8.0
|
torchaudio>=2.8.0
|
||||||
torchcodec>=0.9.1
|
torchcodec>=0.9.1
|
||||||
|
|||||||
@@ -8,7 +8,13 @@ else
|
|||||||
source $backend_dir/../common/libbackend.sh
|
source $backend_dir/../common/libbackend.sh
|
||||||
fi
|
fi
|
||||||
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
||||||
|
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
||||||
|
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
||||||
|
# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
|
||||||
|
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
||||||
|
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
||||||
|
fi
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
|
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
|
||||||
|
|||||||
12
backend/python/trl/requirements-mps.txt
Normal file
12
backend/python/trl/requirements-mps.txt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
torch==2.10.0
|
||||||
|
trl
|
||||||
|
peft
|
||||||
|
datasets>=3.0.0
|
||||||
|
transformers>=4.56.2
|
||||||
|
accelerate>=1.4.0
|
||||||
|
huggingface-hub>=1.3.0
|
||||||
|
sentencepiece
|
||||||
|
# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
|
||||||
|
# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
|
||||||
|
# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
|
||||||
|
# on macOS arm64.
|
||||||
@@ -140,7 +140,7 @@ type RunCMD struct {
|
|||||||
OIDCIssuer string `env:"LOCALAI_OIDC_ISSUER" help:"OIDC issuer URL for auto-discovery" group:"auth"`
|
OIDCIssuer string `env:"LOCALAI_OIDC_ISSUER" help:"OIDC issuer URL for auto-discovery" group:"auth"`
|
||||||
OIDCClientID string `env:"LOCALAI_OIDC_CLIENT_ID" help:"OIDC Client ID (auto-enables auth)" group:"auth"`
|
OIDCClientID string `env:"LOCALAI_OIDC_CLIENT_ID" help:"OIDC Client ID (auto-enables auth)" group:"auth"`
|
||||||
OIDCClientSecret string `env:"LOCALAI_OIDC_CLIENT_SECRET" help:"OIDC Client Secret" group:"auth"`
|
OIDCClientSecret string `env:"LOCALAI_OIDC_CLIENT_SECRET" help:"OIDC Client Secret" group:"auth"`
|
||||||
AuthBaseURL string `env:"LOCALAI_BASE_URL" help:"Base URL for OAuth callbacks (e.g. http://localhost:8080)" group:"auth"`
|
ExternalBaseURL string `env:"LOCALAI_BASE_URL" help:"External base URL of this instance (e.g. https://localhost:8080). Used for OAuth callbacks and self-referential links (generated images/videos, job status). When unset, derived from X-Forwarded-Proto/Host or Forwarded headers." group:"api"`
|
||||||
AuthAdminEmail string `env:"LOCALAI_ADMIN_EMAIL" help:"Email address to auto-promote to admin role" group:"auth"`
|
AuthAdminEmail string `env:"LOCALAI_ADMIN_EMAIL" help:"Email address to auto-promote to admin role" group:"auth"`
|
||||||
AuthRegistrationMode string `env:"LOCALAI_REGISTRATION_MODE" default:"open" help:"Registration mode: 'open' (default), 'approval', or 'invite' (invite code required)" group:"auth"`
|
AuthRegistrationMode string `env:"LOCALAI_REGISTRATION_MODE" default:"open" help:"Registration mode: 'open' (default), 'approval', or 'invite' (invite code required)" group:"auth"`
|
||||||
DisableLocalAuth bool `env:"LOCALAI_DISABLE_LOCAL_AUTH" default:"false" help:"Disable local email/password registration and login (use with OAuth/OIDC-only setups)" group:"auth"`
|
DisableLocalAuth bool `env:"LOCALAI_DISABLE_LOCAL_AUTH" default:"false" help:"Disable local email/password registration and login (use with OAuth/OIDC-only setups)" group:"auth"`
|
||||||
@@ -503,9 +503,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
opts = append(opts, config.WithAuthOIDCClientID(r.OIDCClientID))
|
opts = append(opts, config.WithAuthOIDCClientID(r.OIDCClientID))
|
||||||
opts = append(opts, config.WithAuthOIDCClientSecret(r.OIDCClientSecret))
|
opts = append(opts, config.WithAuthOIDCClientSecret(r.OIDCClientSecret))
|
||||||
}
|
}
|
||||||
if r.AuthBaseURL != "" {
|
|
||||||
opts = append(opts, config.WithAuthBaseURL(r.AuthBaseURL))
|
|
||||||
}
|
|
||||||
if r.AuthAdminEmail != "" {
|
if r.AuthAdminEmail != "" {
|
||||||
opts = append(opts, config.WithAuthAdminEmail(r.AuthAdminEmail))
|
opts = append(opts, config.WithAuthAdminEmail(r.AuthAdminEmail))
|
||||||
}
|
}
|
||||||
@@ -523,6 +520,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Applied unconditionally: the external base URL governs all self-referential
|
||||||
|
// links (not just OAuth callbacks), so it must take effect even when auth is off.
|
||||||
|
if r.ExternalBaseURL != "" {
|
||||||
|
opts = append(opts, config.WithExternalBaseURL(r.ExternalBaseURL))
|
||||||
|
}
|
||||||
|
|
||||||
if idleWatchDog || busyWatchDog {
|
if idleWatchDog || busyWatchDog {
|
||||||
opts = append(opts, config.EnableWatchDog)
|
opts = append(opts, config.EnableWatchDog)
|
||||||
if idleWatchDog {
|
if idleWatchDog {
|
||||||
|
|||||||
@@ -49,6 +49,13 @@ type ApplicationConfig struct {
|
|||||||
P2PNetworkID string
|
P2PNetworkID string
|
||||||
Federated bool
|
Federated bool
|
||||||
|
|
||||||
|
// ExternalBaseURL is the externally visible base URL of this instance
|
||||||
|
// (scheme+host[:port]), set via LOCALAI_BASE_URL. When non-empty it is
|
||||||
|
// authoritative for every self-referential URL LocalAI emits (OAuth
|
||||||
|
// callbacks, generated image/video links, async job StatusURLs),
|
||||||
|
// overriding proxy-header detection. Empty = derive from request headers.
|
||||||
|
ExternalBaseURL string
|
||||||
|
|
||||||
// DisableStats turns off per-request token tracking. By default the
|
// DisableStats turns off per-request token tracking. By default the
|
||||||
// routing module's billing recorder runs in every mode (including
|
// routing module's billing recorder runs in every mode (including
|
||||||
// no-auth single-user) so dashboards and `/api/usage` are immediately
|
// no-auth single-user) so dashboards and `/api/usage` are immediately
|
||||||
@@ -196,7 +203,6 @@ type AuthConfig struct {
|
|||||||
OIDCIssuer string // OIDC issuer URL for auto-discovery (e.g. https://accounts.google.com)
|
OIDCIssuer string // OIDC issuer URL for auto-discovery (e.g. https://accounts.google.com)
|
||||||
OIDCClientID string
|
OIDCClientID string
|
||||||
OIDCClientSecret string
|
OIDCClientSecret string
|
||||||
BaseURL string // for OAuth callback URLs (e.g. "http://localhost:8080")
|
|
||||||
AdminEmail string // auto-promote to admin on login
|
AdminEmail string // auto-promote to admin on login
|
||||||
RegistrationMode string // "open", "approval" (default when empty), "invite"
|
RegistrationMode string // "open", "approval" (default when empty), "invite"
|
||||||
DisableLocalAuth bool // disable local email/password registration and login
|
DisableLocalAuth bool // disable local email/password registration and login
|
||||||
@@ -950,9 +956,9 @@ func WithAuthGitHubClientSecret(clientSecret string) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithAuthBaseURL(baseURL string) AppOption {
|
func WithExternalBaseURL(url string) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.Auth.BaseURL = baseURL
|
o.ExternalBaseURL = url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -54,8 +54,35 @@ func (g GPU) IsNVIDIABlackwell() bool {
|
|||||||
return maj >= 12
|
return maj >= 12
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute-buffer headroom guard for the raised physical batch.
|
||||||
|
//
|
||||||
|
// Raising n_ubatch grows the CUDA *compute buffer* (the scratch for the forward
|
||||||
|
// graph), which is allocated PER DEVICE — it does not benefit from a second GPU
|
||||||
|
// the way weights or KV (which are split across devices) do. The buffer scales
|
||||||
|
// ~linearly with n_ubatch * n_ctx, so a large context turns the GB10-tuned
|
||||||
|
// ub2048 into multi-GiB of extra scratch that must fit on a SINGLE card. On a
|
||||||
|
// 16 GiB consumer Blackwell with a 200k context that overflows (issue #10485),
|
||||||
|
// even though the GB10 it was measured on (128 GiB unified memory) had room.
|
||||||
|
//
|
||||||
|
// These constants size a conservative guard: only raise the batch when the
|
||||||
|
// extra scratch fits the per-device VRAM ceiling.
|
||||||
|
const (
|
||||||
|
// computeBufferBytesPerCell approximates the CUDA compute-buffer cost of one
|
||||||
|
// (n_ubatch * n_ctx) cell. Derived from an observed allocation (ub2048 *
|
||||||
|
// ctx204800 ~= 4.5 GiB => ~11 B/cell) and rounded up to 16 for margin, since
|
||||||
|
// the real cost also grows with model width (heads / embedding dim) which we
|
||||||
|
// don't know at config time.
|
||||||
|
computeBufferBytesPerCell = 16
|
||||||
|
// blackwellBatchHeadroomDivisor caps the extra compute buffer from raising the
|
||||||
|
// physical batch at VRAM/divisor. /4 keeps the bulk of a device for weights +
|
||||||
|
// KV, which already dominate VRAM use.
|
||||||
|
blackwellBatchHeadroomDivisor = 4
|
||||||
|
)
|
||||||
|
|
||||||
// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
|
// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
|
||||||
// given hardware, used when the model config leaves batch unset.
|
// given hardware class, ignoring context/VRAM headroom. Use
|
||||||
|
// PhysicalBatchForContext when a model context and per-device VRAM are known
|
||||||
|
// (the load paths) so the raised batch can't overflow a single device.
|
||||||
func PhysicalBatch(g GPU) int {
|
func PhysicalBatch(g GPU) int {
|
||||||
if g.IsNVIDIABlackwell() {
|
if g.IsNVIDIABlackwell() {
|
||||||
return BlackwellPhysicalBatch
|
return BlackwellPhysicalBatch
|
||||||
@@ -63,6 +90,32 @@ func PhysicalBatch(g GPU) int {
|
|||||||
return DefaultPhysicalBatch
|
return DefaultPhysicalBatch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PhysicalBatchForContext is PhysicalBatch gated on per-device VRAM headroom for
|
||||||
|
// the given context: it only raises the batch above the conservative default
|
||||||
|
// when the extra compute buffer (which is allocated on a single device and grows
|
||||||
|
// with n_ubatch * n_ctx) fits within blackwellBatchHeadroomDivisor of the GPU's
|
||||||
|
// VRAM. g.VRAM must be the PER-DEVICE ceiling (the smallest device on a
|
||||||
|
// multi-GPU host), not the summed total — the compute buffer can't be split.
|
||||||
|
//
|
||||||
|
// VRAM 0 (unknown) stays conservative rather than risk a per-device OOM; the
|
||||||
|
// GB10 / unified-memory path reports system RAM, so it still clears the guard.
|
||||||
|
func PhysicalBatchForContext(g GPU, ctx int) int {
|
||||||
|
if !g.IsNVIDIABlackwell() {
|
||||||
|
return DefaultPhysicalBatch
|
||||||
|
}
|
||||||
|
if ctx <= 0 {
|
||||||
|
ctx = DefaultContextSize
|
||||||
|
}
|
||||||
|
if g.VRAM == 0 {
|
||||||
|
return DefaultPhysicalBatch
|
||||||
|
}
|
||||||
|
extra := uint64(ctx) * uint64(BlackwellPhysicalBatch-DefaultPhysicalBatch) * computeBufferBytesPerCell
|
||||||
|
if extra <= g.VRAM/blackwellBatchHeadroomDivisor {
|
||||||
|
return BlackwellPhysicalBatch
|
||||||
|
}
|
||||||
|
return DefaultPhysicalBatch
|
||||||
|
}
|
||||||
|
|
||||||
// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
|
// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
|
||||||
// Callers that re-tune a value chosen by an upstream host (the distributed
|
// Callers that re-tune a value chosen by an upstream host (the distributed
|
||||||
// router correcting the frontend's guess) use this to avoid clobbering an
|
// router correcting the frontend's guess) use this to avoid clobbering an
|
||||||
@@ -122,7 +175,12 @@ func hasParallelOption(opts []string) bool {
|
|||||||
// deterministic device — detection does a live nvidia-smi call.
|
// deterministic device — detection does a live nvidia-smi call.
|
||||||
var localGPU = func() GPU {
|
var localGPU = func() GPU {
|
||||||
vendor, _ := xsysinfo.DetectGPUVendor()
|
vendor, _ := xsysinfo.DetectGPUVendor()
|
||||||
vram, _ := xsysinfo.TotalAvailableVRAM()
|
// Use the SMALLEST device's VRAM, not the summed total: the parallel-slot
|
||||||
|
// tier and the batch headroom guard both reason about what fits on a single
|
||||||
|
// card, and per-device compute buffers can't be split across GPUs. Summing
|
||||||
|
// two 16 GiB cards into "32 GiB" is what over-provisioned multi-GPU hosts
|
||||||
|
// into OOM (issue #10485).
|
||||||
|
vram, _ := xsysinfo.MinPerGPUVRAM()
|
||||||
return GPU{
|
return GPU{
|
||||||
Vendor: vendor,
|
Vendor: vendor,
|
||||||
ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
|
ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
|
||||||
@@ -137,10 +195,20 @@ func ApplyHardwareDefaults(cfg *ModelConfig, gpu GPU) {
|
|||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if cfg.Batch == 0 && gpu.IsNVIDIABlackwell() {
|
// Raise the physical batch on Blackwell only when the resulting compute
|
||||||
|
// buffer fits the per-device VRAM at THIS model's context. Leaving Batch at 0
|
||||||
|
// (rather than writing the default 512) preserves the downstream single-pass
|
||||||
|
// sizing in core/backend.EffectiveBatchSize for embedding/score/rerank.
|
||||||
|
if cfg.Batch == 0 {
|
||||||
|
ctx := DefaultContextSize
|
||||||
|
if cfg.ContextSize != nil {
|
||||||
|
ctx = *cfg.ContextSize
|
||||||
|
}
|
||||||
|
if PhysicalBatchForContext(gpu, ctx) == BlackwellPhysicalBatch {
|
||||||
cfg.Batch = BlackwellPhysicalBatch
|
cfg.Batch = BlackwellPhysicalBatch
|
||||||
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
|
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
|
||||||
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability)
|
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability, "context", ctx, "vram_gib", gpu.VRAM>>30)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable concurrent serving by default on a capable GPU: without this the
|
// Enable concurrent serving by default on a capable GPU: without this the
|
||||||
|
|||||||
@@ -9,26 +9,37 @@ import (
|
|||||||
// GPU. The detection seam (localGPU) is injected so the path is deterministic
|
// GPU. The detection seam (localGPU) is injected so the path is deterministic
|
||||||
// without a real GPU.
|
// without a real GPU.
|
||||||
var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
|
var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
|
||||||
|
const gib = uint64(1) << 30
|
||||||
|
|
||||||
var orig func() GPU
|
var orig func() GPU
|
||||||
BeforeEach(func() { orig = localGPU })
|
BeforeEach(func() { orig = localGPU })
|
||||||
AfterEach(func() { localGPU = orig })
|
AfterEach(func() { localGPU = orig })
|
||||||
|
|
||||||
It("sets the physical batch on a local Blackwell GPU", func() {
|
It("sets the physical batch on a local Blackwell GPU with headroom", func() {
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
|
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.SetDefaults()
|
cfg.SetDefaults()
|
||||||
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("leaves batch unset when a large context would overflow the device", func() {
|
||||||
|
// Regression guard for issue #10485: 16 GiB consumer Blackwell + ~200k ctx.
|
||||||
|
localGPU = func() GPU { return GPU{ComputeCapability: "12.0", VRAM: 16 * gib} }
|
||||||
|
ctx := 204800
|
||||||
|
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
||||||
|
cfg.SetDefaults()
|
||||||
|
Expect(cfg.Batch).To(Equal(0))
|
||||||
|
})
|
||||||
|
|
||||||
It("leaves batch unset on a non-Blackwell local GPU", func() {
|
It("leaves batch unset on a non-Blackwell local GPU", func() {
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "8.9"} }
|
localGPU = func() GPU { return GPU{ComputeCapability: "8.9", VRAM: 119 * gib} }
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.SetDefaults()
|
cfg.SetDefaults()
|
||||||
Expect(cfg.Batch).To(Equal(0))
|
Expect(cfg.Batch).To(Equal(0))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("never overrides an explicit batch", func() {
|
It("never overrides an explicit batch", func() {
|
||||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
|
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.Batch = 1024
|
cfg.Batch = 1024
|
||||||
cfg.SetDefaults()
|
cfg.SetDefaults()
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _ = Describe("Hardware-driven config defaults", func() {
|
var _ = Describe("Hardware-driven config defaults", func() {
|
||||||
|
const gib = uint64(1) << 30
|
||||||
|
|
||||||
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
|
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
|
||||||
func(cc string, want bool) {
|
func(cc string, want bool) {
|
||||||
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
|
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
|
||||||
@@ -35,21 +37,54 @@ var _ = Describe("Hardware-driven config defaults", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("PhysicalBatchForContext (per-device VRAM headroom)", func() {
|
||||||
|
It("raises the batch when the compute buffer fits the device", func() {
|
||||||
|
// 16 GiB Blackwell with a small context: the extra scratch is tiny.
|
||||||
|
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 8192)).
|
||||||
|
To(Equal(BlackwellPhysicalBatch))
|
||||||
|
})
|
||||||
|
It("keeps the default batch when a large context would overflow one device", func() {
|
||||||
|
// The issue #10485 case: 16 GiB consumer Blackwell, ~200k context.
|
||||||
|
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 204800)).
|
||||||
|
To(Equal(DefaultPhysicalBatch))
|
||||||
|
})
|
||||||
|
It("still raises the batch on a large unified-memory device (GB10)", func() {
|
||||||
|
// GB10 reports system RAM (~119 GiB) as its single device's VRAM.
|
||||||
|
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1", VRAM: 119 * gib}, 204800)).
|
||||||
|
To(Equal(BlackwellPhysicalBatch))
|
||||||
|
})
|
||||||
|
It("stays conservative when VRAM is unknown", func() {
|
||||||
|
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1"}, 8192)).
|
||||||
|
To(Equal(DefaultPhysicalBatch))
|
||||||
|
})
|
||||||
|
It("never raises the batch on non-Blackwell", func() {
|
||||||
|
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "9.0", VRAM: 80 * gib}, 8192)).
|
||||||
|
To(Equal(DefaultPhysicalBatch))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Describe("ApplyHardwareDefaults", func() {
|
Describe("ApplyHardwareDefaults", func() {
|
||||||
It("raises an unset batch to 2048 on Blackwell", func() {
|
It("raises an unset batch to 2048 on Blackwell with headroom", func() {
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
||||||
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
||||||
})
|
})
|
||||||
|
It("leaves batch unset when a large context would overflow one device", func() {
|
||||||
|
// Regression guard for issue #10485: 16 GiB card + ~200k context.
|
||||||
|
ctx := 204800
|
||||||
|
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
||||||
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.0", VRAM: 16 * gib})
|
||||||
|
Expect(cfg.Batch).To(Equal(0))
|
||||||
|
})
|
||||||
It("leaves batch unset on non-Blackwell", func() {
|
It("leaves batch unset on non-Blackwell", func() {
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0", VRAM: 119 * gib})
|
||||||
Expect(cfg.Batch).To(Equal(0))
|
Expect(cfg.Batch).To(Equal(0))
|
||||||
})
|
})
|
||||||
It("never overrides an explicit batch", func() {
|
It("never overrides an explicit batch", func() {
|
||||||
cfg := &ModelConfig{}
|
cfg := &ModelConfig{}
|
||||||
cfg.Batch = 1024
|
cfg.Batch = 1024
|
||||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
||||||
Expect(cfg.Batch).To(Equal(1024))
|
Expect(cfg.Batch).To(Equal(1024))
|
||||||
})
|
})
|
||||||
It("no-ops on nil", func() {
|
It("no-ops on nil", func() {
|
||||||
@@ -57,8 +92,6 @@ var _ = Describe("Hardware-driven config defaults", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
const gib = uint64(1) << 30
|
|
||||||
|
|
||||||
DescribeTable("DefaultParallelSlots (by VRAM)",
|
DescribeTable("DefaultParallelSlots (by VRAM)",
|
||||||
func(vramGiB uint64, want int) {
|
func(vramGiB uint64, want int) {
|
||||||
Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want))
|
Expect(DefaultParallelSlots(GPU{VRAM: vramGiB * gib})).To(Equal(want))
|
||||||
|
|||||||
@@ -1204,11 +1204,6 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
// This ensures gallery-installed and runtime-loaded models get optimal parameters.
|
// This ensures gallery-installed and runtime-loaded models get optimal parameters.
|
||||||
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
|
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
|
||||||
|
|
||||||
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell).
|
|
||||||
// Uses the local GPU here; in distributed mode the router re-applies the same
|
|
||||||
// heuristics for the selected node's GPU before loading. Explicit config wins.
|
|
||||||
ApplyHardwareDefaults(cfg, localGPU())
|
|
||||||
|
|
||||||
// Apply serving-policy defaults (device-independent): cross-request prefix
|
// Apply serving-policy defaults (device-independent): cross-request prefix
|
||||||
// caching. Propagates to distributed nodes via the model options.
|
// caching. Propagates to distributed nodes via the model options.
|
||||||
ApplyServingDefaults(cfg)
|
ApplyServingDefaults(cfg)
|
||||||
@@ -1247,6 +1242,16 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
cfg.ContextSize = &ctx
|
cfg.ContextSize = &ctx
|
||||||
}
|
}
|
||||||
runBackendHooks(cfg, lo.modelPath)
|
runBackendHooks(cfg, lo.modelPath)
|
||||||
|
|
||||||
|
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell)
|
||||||
|
// LAST, after the context size is fully resolved (explicit config, LoadOptions,
|
||||||
|
// then the GGUF guess inside runBackendHooks): the Blackwell batch guard sizes
|
||||||
|
// the per-device compute buffer against this model's context, so it must see
|
||||||
|
// the final value, not a pre-guess nil. Uses the local GPU here; in distributed
|
||||||
|
// mode the router re-applies the same heuristics for the selected node's GPU
|
||||||
|
// before loading. Explicit config always wins.
|
||||||
|
ApplyHardwareDefaults(cfg, localGPU())
|
||||||
|
|
||||||
cfg.syncKnownUsecasesFromString()
|
cfg.syncKnownUsecasesFromString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -149,6 +149,18 @@ func API(application *application.Application) (*echo.Echo, error) {
|
|||||||
// Middleware - StripPathPrefix must be registered early as it uses Rewrite which runs before routing
|
// Middleware - StripPathPrefix must be registered early as it uses Rewrite which runs before routing
|
||||||
e.Pre(httpMiddleware.StripPathPrefix())
|
e.Pre(httpMiddleware.StripPathPrefix())
|
||||||
|
|
||||||
|
// Stamp the configured external base URL into each request context so
|
||||||
|
// middleware.BaseURL can treat it as authoritative for self-referential
|
||||||
|
// links. Registered as Pre so it runs before routing and handlers.
|
||||||
|
if extBaseURL := application.ApplicationConfig().ExternalBaseURL; extBaseURL != "" {
|
||||||
|
e.Pre(func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||||
|
return func(c echo.Context) error {
|
||||||
|
c.Set("_external_base_url", extBaseURL)
|
||||||
|
return next(c)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
e.Pre(middleware.RemoveTrailingSlash())
|
e.Pre(middleware.RemoveTrailingSlash())
|
||||||
|
|
||||||
if application.ApplicationConfig().MachineTag != "" {
|
if application.ApplicationConfig().MachineTag != "" {
|
||||||
|
|||||||
@@ -55,17 +55,70 @@ func BasePathPrefix(c echo.Context) string {
|
|||||||
// The returned URL is guaranteed to end with `/`.
|
// The returned URL is guaranteed to end with `/`.
|
||||||
// The method should be used in conjunction with the StripPathPrefix middleware.
|
// The method should be used in conjunction with the StripPathPrefix middleware.
|
||||||
func BaseURL(c echo.Context) string {
|
func BaseURL(c echo.Context) string {
|
||||||
|
// An explicit external base URL (LOCALAI_BASE_URL) is authoritative for
|
||||||
|
// the origin. The proxy-derived path prefix is still appended so a
|
||||||
|
// reverse-proxy mount point keeps working. Trailing slashes are
|
||||||
|
// normalized via BasePathPrefix, which always starts and ends with "/".
|
||||||
|
if ext, ok := c.Get("_external_base_url").(string); ok && ext != "" {
|
||||||
|
return strings.TrimRight(ext, "/") + BasePathPrefix(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
fwdProto, fwdHost := parseForwarded(c.Request().Header.Get("Forwarded"))
|
||||||
|
|
||||||
scheme := "http"
|
scheme := "http"
|
||||||
if c.Request().Header.Get("X-Forwarded-Proto") == "https" {
|
switch {
|
||||||
|
case c.Request().TLS != nil:
|
||||||
scheme = "https"
|
scheme = "https"
|
||||||
} else if c.Request().TLS != nil {
|
case strings.EqualFold(firstToken(c.Request().Header.Get("X-Forwarded-Proto")), "https"):
|
||||||
|
scheme = "https"
|
||||||
|
case strings.EqualFold(fwdProto, "https"):
|
||||||
scheme = "https"
|
scheme = "https"
|
||||||
}
|
}
|
||||||
|
|
||||||
host := c.Request().Host
|
host := c.Request().Host
|
||||||
if forwardedHost := c.Request().Header.Get("X-Forwarded-Host"); forwardedHost != "" {
|
if forwardedHost := c.Request().Header.Get("X-Forwarded-Host"); forwardedHost != "" {
|
||||||
host = forwardedHost
|
host = forwardedHost
|
||||||
|
} else if fwdHost != "" {
|
||||||
|
host = fwdHost
|
||||||
}
|
}
|
||||||
|
|
||||||
return scheme + "://" + host + BasePathPrefix(c)
|
return scheme + "://" + host + BasePathPrefix(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// firstToken returns the first comma-separated token of v, trimmed of spaces.
|
||||||
|
// Reverse-proxy chains can emit X-Forwarded-Proto as "https,http"; only the
|
||||||
|
// first hop (closest to the client) is meaningful for scheme detection.
|
||||||
|
func firstToken(v string) string {
|
||||||
|
if i := strings.IndexByte(v, ','); i >= 0 {
|
||||||
|
v = v[:i]
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseForwarded extracts the proto and host directives from the first element
|
||||||
|
// of an RFC 7239 Forwarded header (e.g. `for=x;proto=https;host=h, for=y`).
|
||||||
|
// Values may be quoted. Returns empty strings when absent or malformed so the
|
||||||
|
// caller can fall through to other signals.
|
||||||
|
func parseForwarded(header string) (proto, host string) {
|
||||||
|
if header == "" {
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
// Only the first element (closest proxy to the client) matters here.
|
||||||
|
if i := strings.IndexByte(header, ','); i >= 0 {
|
||||||
|
header = header[:i]
|
||||||
|
}
|
||||||
|
for _, directive := range strings.Split(header, ";") {
|
||||||
|
key, value, ok := strings.Cut(strings.TrimSpace(directive), "=")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value = strings.Trim(strings.TrimSpace(value), `"`)
|
||||||
|
switch strings.ToLower(strings.TrimSpace(key)) {
|
||||||
|
case "proto":
|
||||||
|
proto = value
|
||||||
|
case "host":
|
||||||
|
host = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return proto, host
|
||||||
|
}
|
||||||
|
|||||||
@@ -135,4 +135,138 @@ var _ = Describe("BaseURL", func() {
|
|||||||
Entry("missing leading slash", "evil"),
|
Entry("missing leading slash", "evil"),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("scheme detection hardening", func() {
|
||||||
|
It("treats comma-separated X-Forwarded-Proto as https when first token is https", func() {
|
||||||
|
app := echo.New()
|
||||||
|
actualURL := ""
|
||||||
|
app.GET("/x", func(c echo.Context) error {
|
||||||
|
actualURL = BaseURL(c)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest("GET", "/x", nil)
|
||||||
|
req.Header.Set("X-Forwarded-Proto", "https,http")
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rec, req)
|
||||||
|
Expect(actualURL).To(Equal("https://example.com/"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("derives https from the RFC 7239 Forwarded proto directive", func() {
|
||||||
|
app := echo.New()
|
||||||
|
actualURL := ""
|
||||||
|
app.GET("/x", func(c echo.Context) error {
|
||||||
|
actualURL = BaseURL(c)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest("GET", "/x", nil)
|
||||||
|
req.Header.Set("Forwarded", "for=192.0.2.1;proto=https;host=proxy.example")
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rec, req)
|
||||||
|
Expect(actualURL).To(Equal("https://proxy.example/"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("prefers X-Forwarded-Host over the Forwarded host directive", func() {
|
||||||
|
app := echo.New()
|
||||||
|
actualURL := ""
|
||||||
|
app.GET("/x", func(c echo.Context) error {
|
||||||
|
actualURL = BaseURL(c)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest("GET", "/x", nil)
|
||||||
|
req.Header.Set("X-Forwarded-Host", "xfh.example")
|
||||||
|
req.Header.Set("Forwarded", "host=fwd.example;proto=https")
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rec, req)
|
||||||
|
Expect(actualURL).To(Equal("https://xfh.example/"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("explicit external base URL override", func() {
|
||||||
|
It("uses the configured origin over conflicting forwarded headers", func() {
|
||||||
|
app := echo.New()
|
||||||
|
actualURL := ""
|
||||||
|
app.GET("/x", func(c echo.Context) error {
|
||||||
|
c.Set("_external_base_url", "https://192.168.0.13:34567")
|
||||||
|
actualURL = BaseURL(c)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest("GET", "/x", nil)
|
||||||
|
req.Header.Set("X-Forwarded-Proto", "http")
|
||||||
|
req.Header.Set("X-Forwarded-Host", "internal:8080")
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rec, req)
|
||||||
|
Expect(actualURL).To(Equal("https://192.168.0.13:34567/"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("combines the configured origin with a detected path prefix", func() {
|
||||||
|
app := echo.New()
|
||||||
|
actualURL := ""
|
||||||
|
app.GET("/hello", func(c echo.Context) error {
|
||||||
|
c.Set("_original_path", "/localai/hello")
|
||||||
|
c.Set("_external_base_url", "https://ext.example")
|
||||||
|
actualURL = BaseURL(c)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest("GET", "/hello", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rec, req)
|
||||||
|
Expect(actualURL).To(Equal("https://ext.example/localai/"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("ignores an empty override", func() {
|
||||||
|
app := echo.New()
|
||||||
|
actualURL := ""
|
||||||
|
app.GET("/x", func(c echo.Context) error {
|
||||||
|
c.Set("_external_base_url", "")
|
||||||
|
actualURL = BaseURL(c)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
req := httptest.NewRequest("GET", "/x", nil)
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rec, req)
|
||||||
|
Expect(actualURL).To(Equal("http://example.com/"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("parseForwarded helper", func() {
|
||||||
|
It("parses unquoted proto and host", func() {
|
||||||
|
proto, host := parseForwarded("for=192.0.2.1;proto=https;host=h.example")
|
||||||
|
Expect(proto).To(Equal("https"))
|
||||||
|
Expect(host).To(Equal("h.example"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("strips quotes around values", func() {
|
||||||
|
proto, host := parseForwarded(`proto="https";host="h.example"`)
|
||||||
|
Expect(proto).To(Equal("https"))
|
||||||
|
Expect(host).To(Equal("h.example"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("uses only the first element of a multi-element header", func() {
|
||||||
|
proto, host := parseForwarded("proto=https;host=first.example, proto=http;host=second.example")
|
||||||
|
Expect(proto).To(Equal("https"))
|
||||||
|
Expect(host).To(Equal("first.example"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns empty strings for an empty header", func() {
|
||||||
|
proto, host := parseForwarded("")
|
||||||
|
Expect(proto).To(BeEmpty())
|
||||||
|
Expect(host).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("skips directives without a value", func() {
|
||||||
|
proto, host := parseForwarded("proto;host=h.example")
|
||||||
|
Expect(proto).To(BeEmpty())
|
||||||
|
Expect(host).To(Equal("h.example"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("firstToken helper", func() {
|
||||||
|
It("returns the whole trimmed string when there is no comma", func() {
|
||||||
|
Expect(firstToken(" https ")).To(Equal("https"))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns the first trimmed token when there is a comma", func() {
|
||||||
|
Expect(firstToken("https , http")).To(Equal("https"))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -86,6 +86,7 @@
|
|||||||
"input": {
|
"input": {
|
||||||
"placeholder": "Message...",
|
"placeholder": "Message...",
|
||||||
"attachFile": "Attach file",
|
"attachFile": "Attach file",
|
||||||
|
"send": "Send message",
|
||||||
"stopGenerating": "Stop generating",
|
"stopGenerating": "Stop generating",
|
||||||
"canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download",
|
"canvasTitle": "Canvas — extract code blocks and media into a side panel for preview, copy, and download",
|
||||||
"canvasLabel": "Canvas",
|
"canvasLabel": "Canvas",
|
||||||
|
|||||||
@@ -77,6 +77,21 @@
|
|||||||
"noModelsTitle": "No Models Available",
|
"noModelsTitle": "No Models Available",
|
||||||
"noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting."
|
"noModelsBody": "There are no models installed yet. Ask your administrator to set up models so you can start chatting."
|
||||||
},
|
},
|
||||||
|
"starters": {
|
||||||
|
"title": "Recommended for your hardware",
|
||||||
|
"tier": {
|
||||||
|
"cpu": "CPU-only",
|
||||||
|
"gpu-small": "GPU",
|
||||||
|
"gpu-mid": "GPU",
|
||||||
|
"gpu-large": "GPU"
|
||||||
|
},
|
||||||
|
"cpuNote": "No GPU detected — these small models stay responsive on CPU.",
|
||||||
|
"gpuNote": "Picked to fit your available VRAM with room for context.",
|
||||||
|
"install": "Install",
|
||||||
|
"installing": "Installing",
|
||||||
|
"installStarted": "Installing {{model}}…",
|
||||||
|
"installFailed": "Install failed: {{message}}"
|
||||||
|
},
|
||||||
"connect": {
|
"connect": {
|
||||||
"title": "One endpoint, every API",
|
"title": "One endpoint, every API",
|
||||||
"subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.",
|
"subtitle": "LocalAI serves its own full API — image & video generation, depth, object detection, reranking, audio, face & voice recognition, and realtime voice over WebRTC and WebSocket. On top of that, a drop-in compatibility layer lets any app built for OpenAI, Anthropic, Ollama or OpenAI Responses talk to it unchanged.",
|
||||||
|
|||||||
@@ -2,6 +2,16 @@
|
|||||||
"title": "Install Models",
|
"title": "Install Models",
|
||||||
"subtitle": "Browse and install AI models from the gallery",
|
"subtitle": "Browse and install AI models from the gallery",
|
||||||
"models": "Models",
|
"models": "Models",
|
||||||
|
"recommended": {
|
||||||
|
"title": "Recommended for your hardware",
|
||||||
|
"cpuNote": "No GPU detected - small models that stay responsive on CPU.",
|
||||||
|
"gpuNote": "Sized to fit your available VRAM with room for context.",
|
||||||
|
"install": "Install",
|
||||||
|
"installing": "Installing",
|
||||||
|
"installStarted": "Installing {{model}}…",
|
||||||
|
"installFailed": "Install failed: {{message}}",
|
||||||
|
"dismiss": "Dismiss recommendations"
|
||||||
|
},
|
||||||
"stats": {
|
"stats": {
|
||||||
"available": "Available",
|
"available": "Available",
|
||||||
"installed": "Installed"
|
"installed": "Installed"
|
||||||
|
|||||||
@@ -6363,6 +6363,130 @@ select.input {
|
|||||||
justify-content: center;
|
justify-content: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ──────────────────── Home: hardware-aware starter models ──────────────────── */
|
||||||
|
|
||||||
|
.home-starters {
|
||||||
|
margin: var(--spacing-lg) 0;
|
||||||
|
padding: var(--spacing-lg);
|
||||||
|
}
|
||||||
|
.home-starters-head {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: var(--spacing-md);
|
||||||
|
}
|
||||||
|
.home-starters-head strong {
|
||||||
|
font-size: 0.9375rem;
|
||||||
|
}
|
||||||
|
.home-starters-tier {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--color-text-muted);
|
||||||
|
}
|
||||||
|
.home-starters-sub {
|
||||||
|
margin: var(--spacing-xs) 0 var(--spacing-md);
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
color: var(--color-text-secondary);
|
||||||
|
}
|
||||||
|
.home-starters-list {
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
}
|
||||||
|
.home-starters-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: var(--spacing-md);
|
||||||
|
padding: var(--spacing-xs) 0;
|
||||||
|
}
|
||||||
|
.home-starters-name {
|
||||||
|
font-weight: 500;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
word-break: break-all;
|
||||||
|
}
|
||||||
|
.home-starters-badge {
|
||||||
|
font-size: 0.625rem;
|
||||||
|
}
|
||||||
|
.home-starters-size {
|
||||||
|
margin-left: auto;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--color-text-muted);
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ──────────────────── Models gallery: recommended-for-your-hardware strip ──────────────────── */
|
||||||
|
|
||||||
|
.rec-models {
|
||||||
|
margin-bottom: var(--spacing-md);
|
||||||
|
padding: var(--spacing-md) var(--spacing-lg);
|
||||||
|
}
|
||||||
|
.rec-models-head {
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-start;
|
||||||
|
justify-content: space-between;
|
||||||
|
gap: var(--spacing-md);
|
||||||
|
}
|
||||||
|
.rec-models-title {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: var(--spacing-sm);
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
.rec-models-title i {
|
||||||
|
color: var(--color-primary);
|
||||||
|
}
|
||||||
|
.rec-models-note {
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
color: var(--color-text-secondary);
|
||||||
|
}
|
||||||
|
.rec-models-dismiss {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
color: var(--color-text-muted);
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 4px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.rec-models-dismiss:hover {
|
||||||
|
color: var(--color-text-primary);
|
||||||
|
}
|
||||||
|
.rec-models-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
|
||||||
|
gap: var(--spacing-sm);
|
||||||
|
margin-top: var(--spacing-md);
|
||||||
|
}
|
||||||
|
.rec-models-item {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
padding: var(--spacing-sm) var(--spacing-md);
|
||||||
|
border: 1px solid var(--color-border-subtle);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
background: var(--color-bg-primary);
|
||||||
|
}
|
||||||
|
.rec-models-item-name {
|
||||||
|
font-weight: 500;
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
word-break: break-all;
|
||||||
|
}
|
||||||
|
.rec-models-item-meta {
|
||||||
|
display: flex;
|
||||||
|
gap: var(--spacing-sm);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--color-text-muted);
|
||||||
|
}
|
||||||
|
.rec-models-item-fit {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
/* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
|
/* ──────────────────── Home: drop-in endpoint / API compatibility ──────────────────── */
|
||||||
|
|
||||||
.home-connect {
|
.home-connect {
|
||||||
|
|||||||
@@ -1,8 +1,25 @@
|
|||||||
import { useEffect, useMemo } from 'react'
|
import { useEffect, useMemo, useCallback } from 'react'
|
||||||
import { useModels } from '../hooks/useModels'
|
import { useModels } from '../hooks/useModels'
|
||||||
import SearchableSelect from './SearchableSelect'
|
import SearchableSelect from './SearchableSelect'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
|
|
||||||
|
// Remember the last model the user picked, keyed by capability, so returning to
|
||||||
|
// a page (Home chat box, Image, TTS, Talk...) defaults to that model instead of
|
||||||
|
// whatever happens to sort first. Only persisted when a capability key exists —
|
||||||
|
// `externalOptions` callers pass no capability and get the old first-item
|
||||||
|
// behaviour. localStorage access is wrapped because private-browsing modes throw.
|
||||||
|
const LAST_MODEL_PREFIX = 'localai_last_model:'
|
||||||
|
|
||||||
|
function readLastModel(capability) {
|
||||||
|
if (!capability) return null
|
||||||
|
try { return localStorage.getItem(LAST_MODEL_PREFIX + capability) } catch { return null }
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeLastModel(capability, model) {
|
||||||
|
if (!capability || !model) return
|
||||||
|
try { localStorage.setItem(LAST_MODEL_PREFIX + capability, model) } catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
|
||||||
export default function ModelSelector({
|
export default function ModelSelector({
|
||||||
value, onChange, capability, className = '',
|
value, onChange, capability, className = '',
|
||||||
options: externalOptions, loading: externalLoading,
|
options: externalOptions, loading: externalLoading,
|
||||||
@@ -19,16 +36,27 @@ export default function ModelSelector({
|
|||||||
const isLoading = externalOptions ? (externalLoading || false) : hookLoading
|
const isLoading = externalOptions ? (externalLoading || false) : hookLoading
|
||||||
const isDisabled = isLoading || (externalDisabled || false)
|
const isDisabled = isLoading || (externalDisabled || false)
|
||||||
|
|
||||||
|
// Persist genuine selections so the next visit can restore them.
|
||||||
|
const handleChange = useCallback((next) => {
|
||||||
|
writeLastModel(capability, next)
|
||||||
|
onChange(next)
|
||||||
|
}, [capability, onChange])
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (modelNames.length > 0 && (!value || !modelNames.includes(value))) {
|
if (modelNames.length > 0 && (!value || !modelNames.includes(value))) {
|
||||||
onChange(modelNames[0])
|
// Prefer the remembered model when it's still available; otherwise fall
|
||||||
|
// back to the first option. Don't re-persist here — auto-select is not a
|
||||||
|
// user choice, and writing back the stored value would be a harmless but
|
||||||
|
// pointless round-trip.
|
||||||
|
const remembered = readLastModel(capability)
|
||||||
|
onChange(remembered && modelNames.includes(remembered) ? remembered : modelNames[0])
|
||||||
}
|
}
|
||||||
}, [modelNames, value, onChange])
|
}, [modelNames, value, onChange, capability])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<SearchableSelect
|
<SearchableSelect
|
||||||
value={value || ''}
|
value={value || ''}
|
||||||
onChange={onChange}
|
onChange={handleChange}
|
||||||
options={modelNames}
|
options={modelNames}
|
||||||
placeholder={isLoading ? t('selector.loading') : (modelNames.length === 0 ? t('selector.noModels') : t('selector.selectModel'))}
|
placeholder={isLoading ? t('selector.loading') : (modelNames.length === 0 ? t('selector.noModels') : t('selector.selectModel'))}
|
||||||
searchPlaceholder={searchPlaceholder || t('selector.searchPlaceholder')}
|
searchPlaceholder={searchPlaceholder || t('selector.searchPlaceholder')}
|
||||||
|
|||||||
86
core/http/react-ui/src/components/RecommendedModels.jsx
Normal file
86
core/http/react-ui/src/components/RecommendedModels.jsx
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
import { useState } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { modelsApi } from '../utils/api'
|
||||||
|
import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
|
||||||
|
|
||||||
|
const DISMISS_KEY = 'localai_rec_models_dismissed'
|
||||||
|
|
||||||
|
// "Recommended for your hardware" strip at the top of the Models gallery. Shares
|
||||||
|
// the hardware-fit ranking with the empty-state starter widget via
|
||||||
|
// useRecommendedModels, but styled for the gallery page and dismissible (the
|
||||||
|
// gallery is a repeat-visit surface, so it shouldn't nag).
|
||||||
|
export default function RecommendedModels({ addToast }) {
|
||||||
|
const { t } = useTranslation('models')
|
||||||
|
const { recommended, tier, loading } = useRecommendedModels({ count: 4 })
|
||||||
|
const [installing, setInstalling] = useState(() => new Set())
|
||||||
|
const [dismissed, setDismissed] = useState(() => {
|
||||||
|
try { return localStorage.getItem(DISMISS_KEY) === '1' } catch { return false }
|
||||||
|
})
|
||||||
|
|
||||||
|
if (loading || dismissed) return null
|
||||||
|
if (!recommended || recommended.length === 0) return null
|
||||||
|
|
||||||
|
const dismiss = () => {
|
||||||
|
try { localStorage.setItem(DISMISS_KEY, '1') } catch { /* ignore */ }
|
||||||
|
setDismissed(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
const install = async (name) => {
|
||||||
|
setInstalling(prev => new Set(prev).add(name))
|
||||||
|
try {
|
||||||
|
await modelsApi.install(name)
|
||||||
|
addToast?.(t('recommended.installStarted', { model: name }), 'success')
|
||||||
|
} catch (err) {
|
||||||
|
addToast?.(t('recommended.installFailed', { message: err.message }), 'error')
|
||||||
|
setInstalling(prev => {
|
||||||
|
const next = new Set(prev)
|
||||||
|
next.delete(name)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const isGpu = tier.id !== 'cpu'
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="rec-models card">
|
||||||
|
<div className="rec-models-head">
|
||||||
|
<div className="rec-models-title">
|
||||||
|
<i className={`fas ${isGpu ? 'fa-microchip' : 'fa-memory'}`} aria-hidden="true" />
|
||||||
|
<strong>{t('recommended.title')}</strong>
|
||||||
|
<span className="rec-models-note">{isGpu ? t('recommended.gpuNote') : t('recommended.cpuNote')}</span>
|
||||||
|
</div>
|
||||||
|
<button type="button" className="rec-models-dismiss" onClick={dismiss} aria-label={t('recommended.dismiss')} title={t('recommended.dismiss')}>
|
||||||
|
<i className="fas fa-times" aria-hidden="true" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div className="rec-models-grid">
|
||||||
|
{recommended.map(m => {
|
||||||
|
const busy = installing.has(m.name)
|
||||||
|
return (
|
||||||
|
<div key={m.name} className="rec-models-item">
|
||||||
|
<div className="rec-models-item-name">{m.name}</div>
|
||||||
|
<div className="rec-models-item-meta">
|
||||||
|
{isNvfp4Name(m.name) && <span className="badge badge-info">NVFP4</span>}
|
||||||
|
{m.sizeDisplay && <span>{m.sizeDisplay}</span>}
|
||||||
|
{isGpu && m.vramDisplay && (
|
||||||
|
<span className="rec-models-item-fit"><i className="fas fa-microchip" aria-hidden="true" /> {m.vramDisplay}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn btn-primary btn-sm"
|
||||||
|
disabled={busy}
|
||||||
|
onClick={() => install(m.name)}
|
||||||
|
>
|
||||||
|
{busy
|
||||||
|
? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('recommended.installing')}</>)
|
||||||
|
: (<><i className="fas fa-download" aria-hidden="true" /> {t('recommended.install')}</>)}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
129
core/http/react-ui/src/components/StarterModels.jsx
Normal file
129
core/http/react-ui/src/components/StarterModels.jsx
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
import { useState } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { modelsApi } from '../utils/api'
|
||||||
|
import { useRecommendedModels, isNvfp4Name } from '../hooks/useRecommendedModels'
|
||||||
|
|
||||||
|
// Static fallback used only when the live gallery / estimates can't be reached
|
||||||
|
// (offline, trimmed gallery). The hook is the primary, data-driven path; these
|
||||||
|
// are real gallery names kept as a safety net so onboarding never shows nothing.
|
||||||
|
// Gemma picks use the QAT (quantization-aware-trained) Q4 builds. NVIDIA boxes
|
||||||
|
// get NVFP4 + MTP variants at the mid/large tiers (see NVIDIA below).
|
||||||
|
const BASE = {
|
||||||
|
cpu: [
|
||||||
|
{ name: 'gemma-4-e2b-it-qat-q4_0', size: '~1.5 GB' },
|
||||||
|
{ name: 'qwen3.5-4b-claude-4.6-opus-reasoning-distilled', size: '~2.5 GB' },
|
||||||
|
{ name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
|
||||||
|
{ name: 'lfm2.5-1.2b-instruct', size: '~0.8 GB' },
|
||||||
|
],
|
||||||
|
'gpu-small': [
|
||||||
|
{ name: 'gemma-4-e4b-it-qat-q4_0', size: '~3 GB' },
|
||||||
|
{ name: 'lfm2.5-8b-a1b', size: '~5 GB' },
|
||||||
|
{ name: 'qwen3.5-9b', size: '~5.5 GB' },
|
||||||
|
{ name: 'gemma-4-12b-it-qat-q4_0', size: '~7 GB' },
|
||||||
|
],
|
||||||
|
'gpu-mid': [
|
||||||
|
{ name: 'qwen3.6-27b', size: '~16 GB' },
|
||||||
|
{ name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
|
||||||
|
{ name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
|
||||||
|
{ name: 'qwen3.5-27b', size: '~16 GB' },
|
||||||
|
],
|
||||||
|
'gpu-large': [
|
||||||
|
{ name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
|
||||||
|
{ name: 'qwen3.6-35b-a3b-claude-4.6-opus-reasoning-distilled', size: '~20 GB' },
|
||||||
|
{ name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
|
||||||
|
{ name: 'qwen3.5-35b-a3b-apex', size: '~20 GB' },
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
// NVIDIA-only overrides: NVFP4 is a Blackwell-optimised 4-bit format paired with
|
||||||
|
// MTP (multi-token prediction) for speed. Only the mid/large tiers have these.
|
||||||
|
const NVIDIA = {
|
||||||
|
'gpu-mid': [
|
||||||
|
{ name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
|
||||||
|
{ name: 'qwen3.6-27b-mtp-pi-tune', size: '~16 GB' },
|
||||||
|
{ name: 'gemma-4-26b-a4b-it-qat-q4_0', size: '~16 GB' },
|
||||||
|
{ name: 'qwen3.6-27b', size: '~16 GB' },
|
||||||
|
],
|
||||||
|
'gpu-large': [
|
||||||
|
{ name: 'qwen3.6-35b-a3b-nvfp4-mtp', size: '~18 GB' },
|
||||||
|
{ name: 'qwen3.6-27b-nvfp4-mtp', size: '~14 GB' },
|
||||||
|
{ name: 'qwen3.6-35b-a3b-apex', size: '~20 GB' },
|
||||||
|
{ name: 'gemma-4-31b-it-qat-q4_0', size: '~18 GB' },
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
function fallbackFor(tierId, isNvidia) {
|
||||||
|
if (isNvidia && NVIDIA[tierId]) return NVIDIA[tierId]
|
||||||
|
return BASE[tierId] || BASE.cpu
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function StarterModels({ addToast, onInstallStarted }) {
|
||||||
|
const { t } = useTranslation('home')
|
||||||
|
const { recommended, tier, isNvidia, loading } = useRecommendedModels({ count: 4 })
|
||||||
|
const [installing, setInstalling] = useState(() => new Set())
|
||||||
|
|
||||||
|
// While the hardware probe + gallery query are in flight, render nothing
|
||||||
|
// rather than flashing fallback content that may be replaced a moment later.
|
||||||
|
if (loading) return null
|
||||||
|
|
||||||
|
// Prefer live recommendations; fall back to the static list only when the
|
||||||
|
// gallery yielded nothing.
|
||||||
|
const items = (recommended && recommended.length > 0)
|
||||||
|
? recommended.map(r => ({ name: r.name, size: r.sizeDisplay }))
|
||||||
|
: fallbackFor(tier.id, isNvidia)
|
||||||
|
|
||||||
|
if (items.length === 0) return null
|
||||||
|
|
||||||
|
const install = async (name) => {
|
||||||
|
setInstalling(prev => new Set(prev).add(name))
|
||||||
|
try {
|
||||||
|
await modelsApi.install(name)
|
||||||
|
addToast?.(t('starters.installStarted', { model: name }), 'success')
|
||||||
|
onInstallStarted?.(name)
|
||||||
|
} catch (err) {
|
||||||
|
addToast?.(t('starters.installFailed', { message: err.message }), 'error')
|
||||||
|
setInstalling(prev => {
|
||||||
|
const next = new Set(prev)
|
||||||
|
next.delete(name)
|
||||||
|
return next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<section className="home-starters card">
|
||||||
|
<div className="home-starters-head">
|
||||||
|
<strong>{t('starters.title')}</strong>
|
||||||
|
<span className="home-starters-tier">
|
||||||
|
<i className={`fas ${tier.id === 'cpu' ? 'fa-memory' : 'fa-microchip'}`} aria-hidden="true" />
|
||||||
|
{t(`starters.tier.${tier.id}`)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="home-starters-sub">
|
||||||
|
{tier.id === 'cpu' ? t('starters.cpuNote') : t('starters.gpuNote')}
|
||||||
|
</p>
|
||||||
|
<ul className="home-starters-list">
|
||||||
|
{items.map(c => {
|
||||||
|
const busy = installing.has(c.name)
|
||||||
|
return (
|
||||||
|
<li key={c.name} className="home-starters-item">
|
||||||
|
<span className="home-starters-name">{c.name}</span>
|
||||||
|
{isNvfp4Name(c.name) && <span className="badge badge-info home-starters-badge">NVFP4</span>}
|
||||||
|
{c.size && <span className="home-starters-size">{c.size}</span>}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
className="btn btn-primary btn-sm"
|
||||||
|
disabled={busy}
|
||||||
|
onClick={() => install(c.name)}
|
||||||
|
>
|
||||||
|
{busy
|
||||||
|
? (<><i className="fas fa-spinner fa-spin" aria-hidden="true" /> {t('starters.installing')}</>)
|
||||||
|
: (<><i className="fas fa-download" aria-hidden="true" /> {t('starters.install')}</>)}
|
||||||
|
</button>
|
||||||
|
</li>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
)
|
||||||
|
}
|
||||||
66
core/http/react-ui/src/hooks/usePolling.js
vendored
Normal file
66
core/http/react-ui/src/hooks/usePolling.js
vendored
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
import { useEffect, useRef, useCallback } from 'react'
|
||||||
|
|
||||||
|
// usePolling runs `fn` immediately and then on a fixed interval, with two
|
||||||
|
// behaviours every hand-rolled setInterval in this app was missing:
|
||||||
|
//
|
||||||
|
// 1. Visibility-aware: the timer pauses while the tab is hidden
|
||||||
|
// (document.hidden) and fires an immediate catch-up poll when the tab
|
||||||
|
// becomes visible again. A backgrounded dashboard no longer hammers the
|
||||||
|
// server every few seconds for data nobody is looking at.
|
||||||
|
// 2. Non-overlapping: if `fn` returns a promise that takes longer than the
|
||||||
|
// interval, the next tick waits for it instead of stacking requests.
|
||||||
|
//
|
||||||
|
// `enabled: false` stops polling entirely (one-shot or gated polls). The
|
||||||
|
// returned `refetch` runs `fn` on demand and is stable across renders.
|
||||||
|
export function usePolling(fn, intervalMs = 5000, { enabled = true, immediate = true } = {}) {
|
||||||
|
const fnRef = useRef(fn)
|
||||||
|
fnRef.current = fn
|
||||||
|
|
||||||
|
const runningRef = useRef(false)
|
||||||
|
const refetch = useCallback(async () => {
|
||||||
|
// Guard against overlap: a slow poll shouldn't pile up behind a fast timer.
|
||||||
|
if (runningRef.current) return
|
||||||
|
runningRef.current = true
|
||||||
|
try {
|
||||||
|
return await fnRef.current()
|
||||||
|
} finally {
|
||||||
|
runningRef.current = false
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!enabled) return
|
||||||
|
let timer = null
|
||||||
|
|
||||||
|
const tick = () => { refetch() }
|
||||||
|
|
||||||
|
const start = () => {
|
||||||
|
if (timer != null) return
|
||||||
|
timer = setInterval(tick, intervalMs)
|
||||||
|
}
|
||||||
|
const stop = () => {
|
||||||
|
if (timer != null) { clearInterval(timer); timer = null }
|
||||||
|
}
|
||||||
|
|
||||||
|
const onVisibility = () => {
|
||||||
|
if (document.hidden) {
|
||||||
|
stop()
|
||||||
|
} else {
|
||||||
|
// Catch up immediately on return, then resume the cadence.
|
||||||
|
tick()
|
||||||
|
start()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (immediate) tick()
|
||||||
|
if (!document.hidden) start()
|
||||||
|
document.addEventListener('visibilitychange', onVisibility)
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
stop()
|
||||||
|
document.removeEventListener('visibilitychange', onVisibility)
|
||||||
|
}
|
||||||
|
}, [enabled, intervalMs, immediate, refetch])
|
||||||
|
|
||||||
|
return { refetch }
|
||||||
|
}
|
||||||
108
core/http/react-ui/src/hooks/useRecommendedModels.js
vendored
Normal file
108
core/http/react-ui/src/hooks/useRecommendedModels.js
vendored
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
import { modelsApi } from '../utils/api'
|
||||||
|
import { useResources } from './useResources'
|
||||||
|
|
||||||
|
// Data-driven "recommended for your hardware" model picks. The gallery exposes
|
||||||
|
// no popularity/download signal and the list response carries no size, so we:
|
||||||
|
// 1. ask the server for chat-capable models in their natural (curated) order,
|
||||||
|
// 2. estimate size/VRAM for the top candidates (same endpoint the Models page
|
||||||
|
// uses), and
|
||||||
|
// 3. rank by hardware fit — smallest on CPU-only boxes, largest-that-fits on
|
||||||
|
// GPUs (bigger == better quality while still fitting VRAM).
|
||||||
|
//
|
||||||
|
// Returns `recommended === null` while loading, `[]` when nothing could be
|
||||||
|
// resolved (gallery/estimates unavailable) so callers can fall back.
|
||||||
|
|
||||||
|
const GB = 1024 * 1024 * 1024
|
||||||
|
const DEFAULT_CTX = 4096
|
||||||
|
|
||||||
|
// NVFP4 is a Blackwell/NVIDIA-specific 4-bit format — only worth suggesting on
|
||||||
|
// NVIDIA hardware, and to be filtered out elsewhere.
|
||||||
|
export const isNvfp4Name = (name) => /nvfp4/i.test(name || '')
|
||||||
|
|
||||||
|
export function hasNvidiaGpu(resources) {
|
||||||
|
return Array.isArray(resources?.gpus) &&
|
||||||
|
resources.gpus.some(g => (g?.vendor || '').toLowerCase() === 'nvidia')
|
||||||
|
}
|
||||||
|
|
||||||
|
export function recommendTier(resources) {
|
||||||
|
const isGpu = resources?.type === 'gpu'
|
||||||
|
const vram = resources?.aggregate?.total_memory || 0
|
||||||
|
if (!isGpu || vram <= 0) return { id: 'cpu', vram: 0 }
|
||||||
|
if (vram < 8 * GB) return { id: 'gpu-small', vram }
|
||||||
|
if (vram < 24 * GB) return { id: 'gpu-mid', vram }
|
||||||
|
return { id: 'gpu-large', vram }
|
||||||
|
}
|
||||||
|
|
||||||
|
function rank(candidates, tier, count, isNvidia) {
|
||||||
|
// NVFP4 only runs on NVIDIA (Blackwell) — drop it everywhere else, and prefer
|
||||||
|
// it on NVIDIA boxes where it's the fastest path.
|
||||||
|
const pool = candidates.filter(c => c.sizeBytes != null && (isNvidia || !isNvfp4Name(c.name)))
|
||||||
|
if (tier.id === 'cpu') {
|
||||||
|
// No GPU: smallest models stay responsive on CPU.
|
||||||
|
return [...pool].sort((a, b) => a.sizeBytes - b.sizeBytes).slice(0, count)
|
||||||
|
}
|
||||||
|
const limit = tier.vram * 0.95
|
||||||
|
const fits = pool.filter(c => c.vramBytes != null && c.vramBytes <= limit)
|
||||||
|
const base = fits.length > 0 ? fits : pool // tiny GPU where nothing fits → fall through to smallest
|
||||||
|
const byPreference = (a, b) => {
|
||||||
|
// On NVIDIA, surface NVFP4 first; then largest-that-fits (best quality).
|
||||||
|
if (isNvidia) {
|
||||||
|
const an = isNvfp4Name(a.name), bn = isNvfp4Name(b.name)
|
||||||
|
if (an !== bn) return an ? -1 : 1
|
||||||
|
}
|
||||||
|
return fits.length > 0 ? b.sizeBytes - a.sizeBytes : a.sizeBytes - b.sizeBytes
|
||||||
|
}
|
||||||
|
return [...base].sort(byPreference).slice(0, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useRecommendedModels({ count = 4, candidatePool = 10 } = {}) {
|
||||||
|
const { resources } = useResources()
|
||||||
|
const [recommended, setRecommended] = useState(null)
|
||||||
|
const [error, setError] = useState(null)
|
||||||
|
|
||||||
|
const resReady = resources !== null
|
||||||
|
const tier = recommendTier(resources)
|
||||||
|
const isNvidia = hasNvidiaGpu(resources)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!resReady) return
|
||||||
|
let cancelled = false
|
||||||
|
setRecommended(null)
|
||||||
|
setError(null)
|
||||||
|
;(async () => {
|
||||||
|
try {
|
||||||
|
const data = await modelsApi.list({ tag: 'chat', items: candidatePool, page: 1 })
|
||||||
|
// Recommend models the user hasn't installed yet.
|
||||||
|
const models = (data?.models || []).filter(m => !m.installed)
|
||||||
|
const estimated = await Promise.all(models.map(async (m) => {
|
||||||
|
const name = m.name || m.id
|
||||||
|
try {
|
||||||
|
const e = await modelsApi.estimate(name, [DEFAULT_CTX])
|
||||||
|
const ctx = e?.estimates?.[String(DEFAULT_CTX)]
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
description: m.description,
|
||||||
|
sizeBytes: e?.sizeBytes ?? null,
|
||||||
|
sizeDisplay: e?.sizeDisplay ?? null,
|
||||||
|
vramBytes: ctx?.vramBytes ?? null,
|
||||||
|
vramDisplay: ctx?.vramDisplay ?? null,
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
return { name, sizeBytes: null }
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
if (cancelled) return
|
||||||
|
setRecommended(rank(estimated, tier, count, isNvidia))
|
||||||
|
} catch (e) {
|
||||||
|
if (cancelled) return
|
||||||
|
setError(e.message)
|
||||||
|
setRecommended([])
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
return () => { cancelled = true }
|
||||||
|
// tier.id / tier.vram / isNvidia are primitives, so resource polling doesn't re-run this.
|
||||||
|
}, [resReady, tier.id, tier.vram, isNvidia, count, candidatePool])
|
||||||
|
|
||||||
|
return { recommended, tier, isNvidia, error, loading: recommended === null }
|
||||||
|
}
|
||||||
17
core/http/react-ui/src/hooks/useResources.js
vendored
17
core/http/react-ui/src/hooks/useResources.js
vendored
@@ -1,11 +1,11 @@
|
|||||||
import { useState, useEffect, useCallback, useRef } from 'react'
|
import { useState, useCallback } from 'react'
|
||||||
import { resourcesApi } from '../utils/api'
|
import { resourcesApi } from '../utils/api'
|
||||||
|
import { usePolling } from './usePolling'
|
||||||
|
|
||||||
export function useResources(pollInterval = 5000) {
|
export function useResources(pollInterval = 5000) {
|
||||||
const [resources, setResources] = useState(null)
|
const [resources, setResources] = useState(null)
|
||||||
const [loading, setLoading] = useState(true)
|
const [loading, setLoading] = useState(true)
|
||||||
const [error, setError] = useState(null)
|
const [error, setError] = useState(null)
|
||||||
const intervalRef = useRef(null)
|
|
||||||
|
|
||||||
const fetchResources = useCallback(async () => {
|
const fetchResources = useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
@@ -19,13 +19,10 @@ export function useResources(pollInterval = 5000) {
|
|||||||
}
|
}
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
useEffect(() => {
|
// Visibility-aware polling: pauses while the tab is hidden and catches up on
|
||||||
fetchResources()
|
// return (see usePolling). Resource stats are pure dashboard data, so there's
|
||||||
intervalRef.current = setInterval(fetchResources, pollInterval)
|
// no reason to keep fetching them for a backgrounded tab.
|
||||||
return () => {
|
const { refetch } = usePolling(fetchResources, pollInterval)
|
||||||
if (intervalRef.current) clearInterval(intervalRef.current)
|
|
||||||
}
|
|
||||||
}, [fetchResources, pollInterval])
|
|
||||||
|
|
||||||
return { resources, loading, error, refetch: fetchResources }
|
return { resources, loading, error, refetch }
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -765,8 +765,10 @@ export default function AgentChat() {
|
|||||||
className="chat-send-btn"
|
className="chat-send-btn"
|
||||||
onClick={handleSend}
|
onClick={handleSend}
|
||||||
disabled={processing || !input.trim()}
|
disabled={processing || !input.trim()}
|
||||||
|
aria-label="Send message"
|
||||||
|
title="Send message"
|
||||||
>
|
>
|
||||||
<i className="fas fa-paper-plane" />
|
<i className="fas fa-paper-plane" aria-hidden="true" />
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1427,8 +1427,10 @@ export default function Chat() {
|
|||||||
className="chat-send-btn"
|
className="chat-send-btn"
|
||||||
onClick={handleSend}
|
onClick={handleSend}
|
||||||
disabled={!input.trim() && files.length === 0}
|
disabled={!input.trim() && files.length === 0}
|
||||||
|
aria-label={t('input.send')}
|
||||||
|
title={t('input.send')}
|
||||||
>
|
>
|
||||||
<i className="fas fa-paper-plane" />
|
<i className="fas fa-paper-plane" aria-hidden="true" />
|
||||||
</button>
|
</button>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import UnifiedMCPDropdown from '../components/UnifiedMCPDropdown'
|
|||||||
import ConfirmDialog from '../components/ConfirmDialog'
|
import ConfirmDialog from '../components/ConfirmDialog'
|
||||||
import HomeConnect from '../components/HomeConnect'
|
import HomeConnect from '../components/HomeConnect'
|
||||||
import { useResources } from '../hooks/useResources'
|
import { useResources } from '../hooks/useResources'
|
||||||
|
import { usePolling } from '../hooks/usePolling'
|
||||||
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
|
import { fileToBase64, backendControlApi, systemApi, modelsApi, mcpApi, nodesApi } from '../utils/api'
|
||||||
import { API_CONFIG } from '../utils/config'
|
import { API_CONFIG } from '../utils/config'
|
||||||
import { greetingKey } from '../utils/greeting'
|
import { greetingKey } from '../utils/greeting'
|
||||||
@@ -17,6 +18,7 @@ import StatusPill from '../components/StatusPill'
|
|||||||
import Skeleton from '../components/Skeleton'
|
import Skeleton from '../components/Skeleton'
|
||||||
import SectionHeading from '../components/SectionHeading'
|
import SectionHeading from '../components/SectionHeading'
|
||||||
import EmptyState from '../components/EmptyState'
|
import EmptyState from '../components/EmptyState'
|
||||||
|
import StarterModels from '../components/StarterModels'
|
||||||
import { staggerStyle } from '../hooks/useStagger'
|
import { staggerStyle } from '../hooks/useStagger'
|
||||||
|
|
||||||
export default function Home() {
|
export default function Home() {
|
||||||
@@ -68,10 +70,9 @@ export default function Home() {
|
|||||||
.catch(() => {})
|
.catch(() => {})
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
// Poll cluster node data in distributed mode
|
// Poll cluster node data in distributed mode. Visibility-aware + gated on
|
||||||
useEffect(() => {
|
// distributedMode so a non-distributed or backgrounded tab makes no calls.
|
||||||
if (!distributedMode) return
|
const fetchCluster = useCallback(async () => {
|
||||||
const fetchCluster = async () => {
|
|
||||||
try {
|
try {
|
||||||
const data = await nodesApi.list()
|
const data = await nodesApi.list()
|
||||||
const nodes = Array.isArray(data) ? data : []
|
const nodes = Array.isArray(data) ? data : []
|
||||||
@@ -97,11 +98,8 @@ export default function Home() {
|
|||||||
totalCount,
|
totalCount,
|
||||||
})
|
})
|
||||||
} catch { setClusterData(null) }
|
} catch { setClusterData(null) }
|
||||||
}
|
}, [])
|
||||||
fetchCluster()
|
usePolling(fetchCluster, 5000, { enabled: distributedMode })
|
||||||
const interval = setInterval(fetchCluster, 5000)
|
|
||||||
return () => clearInterval(interval)
|
|
||||||
}, [distributedMode])
|
|
||||||
|
|
||||||
// Fetch configured models (to know if any exist) and loaded models (currently running)
|
// Fetch configured models (to know if any exist) and loaded models (currently running)
|
||||||
const fetchSystemInfo = useCallback(async () => {
|
const fetchSystemInfo = useCallback(async () => {
|
||||||
@@ -123,11 +121,7 @@ export default function Home() {
|
|||||||
}
|
}
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
useEffect(() => {
|
usePolling(fetchSystemInfo, 5000)
|
||||||
fetchSystemInfo()
|
|
||||||
const interval = setInterval(fetchSystemInfo, 5000)
|
|
||||||
return () => clearInterval(interval)
|
|
||||||
}, [fetchSystemInfo])
|
|
||||||
|
|
||||||
// Check MCP availability when selected model changes
|
// Check MCP availability when selected model changes
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -523,6 +517,8 @@ export default function Home() {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<StarterModels addToast={addToast} onInstallStarted={fetchSystemInfo} />
|
||||||
|
|
||||||
<div className="home-wizard-actions">
|
<div className="home-wizard-actions">
|
||||||
<button className="btn btn-primary" onClick={() => navigate('/app/models')}>
|
<button className="btn btn-primary" onClick={() => navigate('/app/models')}>
|
||||||
<i className="fas fa-store" /> {t('wizard.browseGallery')}
|
<i className="fas fa-store" /> {t('wizard.browseGallery')}
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import ConfirmDialog from '../components/ConfirmDialog'
|
|||||||
import GalleryLoader from '../components/GalleryLoader'
|
import GalleryLoader from '../components/GalleryLoader'
|
||||||
import Toggle from '../components/Toggle'
|
import Toggle from '../components/Toggle'
|
||||||
import ResponsiveTable from '../components/ResponsiveTable'
|
import ResponsiveTable from '../components/ResponsiveTable'
|
||||||
|
import RecommendedModels from '../components/RecommendedModels'
|
||||||
import React from 'react'
|
import React from 'react'
|
||||||
|
|
||||||
|
|
||||||
@@ -301,6 +302,8 @@ export default function Models() {
|
|||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<RecommendedModels addToast={addToast} />
|
||||||
|
|
||||||
{/* Search */}
|
{/* Search */}
|
||||||
<div className="search-bar" style={{ marginBottom: 'var(--spacing-md)' }}>
|
<div className="search-bar" style={{ marginBottom: 'var(--spacing-md)' }}>
|
||||||
<i className="fas fa-search search-icon" />
|
<i className="fas fa-search search-icon" />
|
||||||
|
|||||||
@@ -24,7 +24,37 @@ function formatNumber(n) {
|
|||||||
return String(n)
|
return String(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
function StatCard({ icon, label, value, muted }) {
|
// Opt-in token pricing. LocalAI is self-hosted and has no inherent monetary
|
||||||
|
// cost, but multi-user deployments use estimated cost for chargeback/budgeting.
|
||||||
|
// Prices are admin-supplied $ per 1M tokens, stored locally (per-browser), and
|
||||||
|
// the whole cost surface stays hidden until a non-zero price is set.
|
||||||
|
const TOKEN_PRICING_KEY = 'localai_token_pricing'
|
||||||
|
|
||||||
|
function loadPricing() {
|
||||||
|
try {
|
||||||
|
const p = JSON.parse(localStorage.getItem(TOKEN_PRICING_KEY) || '{}')
|
||||||
|
return { prompt: Number(p.prompt) || 0, completion: Number(p.completion) || 0 }
|
||||||
|
} catch { return { prompt: 0, completion: 0 } }
|
||||||
|
}
|
||||||
|
|
||||||
|
function savePricing(p) {
|
||||||
|
try { localStorage.setItem(TOKEN_PRICING_KEY, JSON.stringify(p)) } catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
function pricingEnabled(p) { return (p?.prompt || 0) > 0 || (p?.completion || 0) > 0 }
|
||||||
|
|
||||||
|
function costOf(row, p) {
|
||||||
|
return (row.prompt_tokens / 1_000_000) * (p.prompt || 0)
|
||||||
|
+ (row.completion_tokens / 1_000_000) * (p.completion || 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatCost(n) {
|
||||||
|
if (!n) return '$0.00'
|
||||||
|
if (n < 0.01) return '<$0.01'
|
||||||
|
return '$' + n.toFixed(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
function StatCard({ icon, label, value, muted, text }) {
|
||||||
return (
|
return (
|
||||||
<div className="card" style={{ padding: 'var(--spacing-sm) var(--spacing-md)', flex: '1 1 0', minWidth: 120, opacity: muted ? 0.7 : 1 }}>
|
<div className="card" style={{ padding: 'var(--spacing-sm) var(--spacing-md)', flex: '1 1 0', minWidth: 120, opacity: muted ? 0.7 : 1 }}>
|
||||||
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}>
|
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 2 }}>
|
||||||
@@ -32,7 +62,7 @@ function StatCard({ icon, label, value, muted }) {
|
|||||||
<span style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', fontWeight: 500, textTransform: 'uppercase', letterSpacing: '0.03em' }}>{label}</span>
|
<span style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', fontWeight: 500, textTransform: 'uppercase', letterSpacing: '0.03em' }}>{label}</span>
|
||||||
</div>
|
</div>
|
||||||
<div style={{ fontSize: '1.375rem', fontWeight: 700, fontFamily: 'var(--font-mono)', color: muted ? 'var(--color-text-secondary)' : 'var(--color-text-primary)' }}>
|
<div style={{ fontSize: '1.375rem', fontWeight: 700, fontFamily: 'var(--font-mono)', color: muted ? 'var(--color-text-secondary)' : 'var(--color-text-primary)' }}>
|
||||||
{muted ? '~' : ''}{formatNumber(value)}
|
{text != null ? text : `${muted ? '~' : ''}${formatNumber(value)}`}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
@@ -642,6 +672,10 @@ export default function Usage() {
|
|||||||
const [activeTab, setActiveTab] = useState('models')
|
const [activeTab, setActiveTab] = useState('models')
|
||||||
const [quotas, setQuotas] = useState([])
|
const [quotas, setQuotas] = useState([])
|
||||||
const [selectedUserId, setSelectedUserId] = useState(null)
|
const [selectedUserId, setSelectedUserId] = useState(null)
|
||||||
|
const [pricing, setPricingState] = useState(loadPricing)
|
||||||
|
const [showPricing, setShowPricing] = useState(false)
|
||||||
|
const setPricing = (p) => { setPricingState(p); savePricing(p) }
|
||||||
|
const costEnabled = pricingEnabled(pricing)
|
||||||
|
|
||||||
const fetchUsage = useCallback(async () => {
|
const fetchUsage = useCallback(async () => {
|
||||||
setLoading(true)
|
setLoading(true)
|
||||||
@@ -743,11 +777,50 @@ export default function Usage() {
|
|||||||
<i className="fas fa-key" style={{ fontSize: '0.7rem' }} /> {t('usage.sources.tab')}
|
<i className="fas fa-key" style={{ fontSize: '0.7rem' }} /> {t('usage.sources.tab')}
|
||||||
</button>
|
</button>
|
||||||
<div style={{ flex: 1 }} />
|
<div style={{ flex: 1 }} />
|
||||||
|
<button
|
||||||
|
className={`btn btn-sm ${costEnabled ? 'btn-primary' : 'btn-secondary'}`}
|
||||||
|
onClick={() => setShowPricing(v => !v)}
|
||||||
|
style={{ gap: 4 }}
|
||||||
|
title="Set token pricing to estimate cost"
|
||||||
|
>
|
||||||
|
<i className="fas fa-dollar-sign" /> {costEnabled ? 'Pricing' : 'Set pricing'}
|
||||||
|
</button>
|
||||||
<button className="btn btn-secondary btn-sm" onClick={fetchUsage} disabled={loading} style={{ gap: 4 }}>
|
<button className="btn btn-secondary btn-sm" onClick={fetchUsage} disabled={loading} style={{ gap: 4 }}>
|
||||||
<i className={`fas fa-rotate${loading ? ' fa-spin' : ''}`} /> Refresh
|
<i className={`fas fa-rotate${loading ? ' fa-spin' : ''}`} /> Refresh
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{showPricing && (
|
||||||
|
<div className="card" style={{ display: 'flex', alignItems: 'flex-end', gap: 'var(--spacing-md)', flexWrap: 'wrap', padding: 'var(--spacing-md)', marginBottom: 'var(--spacing-md)' }}>
|
||||||
|
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||||
|
<label style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', textTransform: 'uppercase', letterSpacing: '0.03em' }}>Prompt $/1M tokens</label>
|
||||||
|
<input
|
||||||
|
className="input" type="number" min="0" step="0.01" style={{ width: 140 }}
|
||||||
|
value={pricing.prompt || ''}
|
||||||
|
placeholder="0.00"
|
||||||
|
onChange={e => setPricing({ ...pricing, prompt: Number(e.target.value) || 0 })}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||||
|
<label style={{ fontSize: '0.6875rem', color: 'var(--color-text-muted)', textTransform: 'uppercase', letterSpacing: '0.03em' }}>Completion $/1M tokens</label>
|
||||||
|
<input
|
||||||
|
className="input" type="number" min="0" step="0.01" style={{ width: 140 }}
|
||||||
|
value={pricing.completion || ''}
|
||||||
|
placeholder="0.00"
|
||||||
|
onChange={e => setPricing({ ...pricing, completion: Number(e.target.value) || 0 })}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{costEnabled && (
|
||||||
|
<button className="btn btn-secondary btn-sm" onClick={() => setPricing({ prompt: 0, completion: 0 })} style={{ gap: 4 }}>
|
||||||
|
<i className="fas fa-times" /> Clear
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
<span style={{ fontSize: '0.75rem', color: 'var(--color-text-muted)', flex: '1 1 200px' }}>
|
||||||
|
Estimated cost only. Prices are stored in this browser and applied to recorded token counts.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{loading ? (
|
{loading ? (
|
||||||
<div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}>
|
<div style={{ display: 'flex', justifyContent: 'center', padding: 'var(--spacing-xl)' }}>
|
||||||
<LoadingSpinner size="lg" />
|
<LoadingSpinner size="lg" />
|
||||||
@@ -760,6 +833,9 @@ export default function Usage() {
|
|||||||
<StatCard icon="fas fa-arrow-up" label="Prompt" value={displayTotals.prompt_tokens} />
|
<StatCard icon="fas fa-arrow-up" label="Prompt" value={displayTotals.prompt_tokens} />
|
||||||
<StatCard icon="fas fa-arrow-down" label="Completion" value={displayTotals.completion_tokens} />
|
<StatCard icon="fas fa-arrow-down" label="Completion" value={displayTotals.completion_tokens} />
|
||||||
<StatCard icon="fas fa-coins" label="Total" value={displayTotals.total_tokens} />
|
<StatCard icon="fas fa-coins" label="Total" value={displayTotals.total_tokens} />
|
||||||
|
{costEnabled && (
|
||||||
|
<StatCard icon="fas fa-dollar-sign" label="Est. Cost" text={formatCost(costOf(displayTotals, pricing))} />
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Predictions */}
|
{/* Predictions */}
|
||||||
@@ -789,6 +865,7 @@ export default function Usage() {
|
|||||||
<th style={{ width: 110 }}>Prompt</th>
|
<th style={{ width: 110 }}>Prompt</th>
|
||||||
<th style={{ width: 110 }}>Completion</th>
|
<th style={{ width: 110 }}>Completion</th>
|
||||||
<th style={{ width: 110 }}>Total</th>
|
<th style={{ width: 110 }}>Total</th>
|
||||||
|
{costEnabled && <th style={{ width: 100 }}>Est. Cost</th>}
|
||||||
<th style={{ width: 140 }}></th>
|
<th style={{ width: 140 }}></th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@@ -800,6 +877,7 @@ export default function Usage() {
|
|||||||
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
|
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
|
||||||
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
|
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
|
||||||
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
|
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
|
||||||
|
{costEnabled && <td style={monoCell}>{formatCost(costOf(row, pricing))}</td>}
|
||||||
<td><UsageBar value={row.total_tokens} max={maxTokens} /></td>
|
<td><UsageBar value={row.total_tokens} max={maxTokens} /></td>
|
||||||
</tr>
|
</tr>
|
||||||
))}
|
))}
|
||||||
@@ -827,6 +905,7 @@ export default function Usage() {
|
|||||||
<th style={{ width: 110 }}>Prompt</th>
|
<th style={{ width: 110 }}>Prompt</th>
|
||||||
<th style={{ width: 110 }}>Completion</th>
|
<th style={{ width: 110 }}>Completion</th>
|
||||||
<th style={{ width: 110 }}>Total</th>
|
<th style={{ width: 110 }}>Total</th>
|
||||||
|
{costEnabled && <th style={{ width: 100 }}>Est. Cost</th>}
|
||||||
<th style={{ width: 110 }}>Proj. Total</th>
|
<th style={{ width: 110 }}>Proj. Total</th>
|
||||||
<th style={{ width: 140 }}></th>
|
<th style={{ width: 140 }}></th>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -849,6 +928,7 @@ export default function Usage() {
|
|||||||
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
|
<td style={monoCell}>{formatNumber(row.prompt_tokens)}</td>
|
||||||
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
|
<td style={monoCell}>{formatNumber(row.completion_tokens)}</td>
|
||||||
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
|
<td style={{ ...monoCell, fontWeight: 600 }}>{formatNumber(row.total_tokens)}</td>
|
||||||
|
{costEnabled && <td style={monoCell}>{formatCost(costOf(row, pricing))}</td>}
|
||||||
<td style={{ ...monoCell, color: 'var(--color-text-muted)', fontStyle: 'italic' }}>
|
<td style={{ ...monoCell, color: 'var(--color-text-muted)', fontStyle: 'italic' }}>
|
||||||
{up?.predictions ? `~${formatNumber(up.predictions.projectedTotals.total_tokens)}` : '-'}
|
{up?.predictions ? `~${formatNumber(up.predictions.projectedTotals.total_tokens)}` : '-'}
|
||||||
</td>
|
</td>
|
||||||
@@ -856,7 +936,7 @@ export default function Usage() {
|
|||||||
</tr>
|
</tr>
|
||||||
{isExpanded && up && (
|
{isExpanded && up && (
|
||||||
<tr>
|
<tr>
|
||||||
<td colSpan={8} style={{ padding: 0, background: 'var(--color-bg-secondary)' }}>
|
<td colSpan={costEnabled ? 9 : 8} style={{ padding: 0, background: 'var(--color-bg-secondary)' }}>
|
||||||
<div style={{ padding: 'var(--spacing-md)' }}>
|
<div style={{ padding: 'var(--spacing-md)' }}>
|
||||||
{up.predictions && (
|
{up.predictions && (
|
||||||
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(100px, 1fr))', gap: 'var(--spacing-xs)', marginBottom: 'var(--spacing-sm)' }}>
|
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(100px, 1fr))', gap: 'var(--spacing-xs)', marginBottom: 'var(--spacing-sm)' }}>
|
||||||
|
|||||||
@@ -268,7 +268,7 @@ func RegisterAuthRoutes(e *echo.Echo, app *application.Application) {
|
|||||||
// Set up OAuth manager when any OAuth/OIDC provider is configured
|
// Set up OAuth manager when any OAuth/OIDC provider is configured
|
||||||
if appConfig.Auth.GitHubClientID != "" || appConfig.Auth.OIDCClientID != "" {
|
if appConfig.Auth.GitHubClientID != "" || appConfig.Auth.OIDCClientID != "" {
|
||||||
oauthMgr, err := auth.NewOAuthManager(
|
oauthMgr, err := auth.NewOAuthManager(
|
||||||
appConfig.Auth.BaseURL,
|
appConfig.ExternalBaseURL,
|
||||||
auth.OAuthParams{
|
auth.OAuthParams{
|
||||||
GitHubClientID: appConfig.Auth.GitHubClientID,
|
GitHubClientID: appConfig.Auth.GitHubClientID,
|
||||||
GitHubClientSecret: appConfig.Auth.GitHubClientSecret,
|
GitHubClientSecret: appConfig.Auth.GitHubClientSecret,
|
||||||
|
|||||||
@@ -156,7 +156,10 @@ func applyNodeHardwareDefaults(opts *pb.ModelOptions, node *BackendNode) {
|
|||||||
VRAM: node.TotalVRAM,
|
VRAM: node.TotalVRAM,
|
||||||
}
|
}
|
||||||
if config.IsManagedPhysicalBatch(int(opts.NBatch)) {
|
if config.IsManagedPhysicalBatch(int(opts.NBatch)) {
|
||||||
opts.NBatch = int32(config.PhysicalBatch(gpu))
|
// Gate the raised batch on the selected node's per-device VRAM at this
|
||||||
|
// model's context, so a large context can't overflow the node's compute
|
||||||
|
// buffer (issue #10485). node.TotalVRAM is the node's reported ceiling.
|
||||||
|
opts.NBatch = int32(config.PhysicalBatchForContext(gpu, int(opts.ContextSize)))
|
||||||
}
|
}
|
||||||
// Default concurrent serving for the selected node (the frontend that built
|
// Default concurrent serving for the selected node (the frontend that built
|
||||||
// the options may have no GPU). Only adds when no parallel option is set.
|
// the options may have no GPU). Only adds when no parallel option is set.
|
||||||
|
|||||||
@@ -8,12 +8,19 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _ = Describe("applyNodeHardwareDefaults", func() {
|
var _ = Describe("applyNodeHardwareDefaults", func() {
|
||||||
It("raises a managed default batch on a Blackwell node", func() {
|
It("raises a managed default batch on a Blackwell node with headroom", func() {
|
||||||
opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch}
|
opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch, ContextSize: 8192}
|
||||||
applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.1"})
|
applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.1", TotalVRAM: 119 << 30})
|
||||||
Expect(opts.NBatch).To(BeEquivalentTo(config.BlackwellPhysicalBatch))
|
Expect(opts.NBatch).To(BeEquivalentTo(config.BlackwellPhysicalBatch))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("keeps the default batch when a large context would overflow the node", func() {
|
||||||
|
// Regression guard for issue #10485 on the distributed path.
|
||||||
|
opts := &pb.ModelOptions{NBatch: config.DefaultPhysicalBatch, ContextSize: 204800}
|
||||||
|
applyNodeHardwareDefaults(opts, &BackendNode{GPUComputeCapability: "12.0", TotalVRAM: 16 << 30})
|
||||||
|
Expect(opts.NBatch).To(BeEquivalentTo(config.DefaultPhysicalBatch))
|
||||||
|
})
|
||||||
|
|
||||||
It("resets a Blackwell guess on a non-Blackwell node", func() {
|
It("resets a Blackwell guess on a non-Blackwell node", func() {
|
||||||
// frontend (Blackwell) guessed high, but the selected node is not Blackwell
|
// frontend (Blackwell) guessed high, but the selected node is not Blackwell
|
||||||
opts := &pb.ModelOptions{NBatch: config.BlackwellPhysicalBatch}
|
opts := &pb.ModelOptions{NBatch: config.BlackwellPhysicalBatch}
|
||||||
|
|||||||
@@ -494,6 +494,39 @@ These llama.cpp options are passed through the `options:` array.
|
|||||||
| `direct_io` / `use_direct_io` | bool | `false` | Open the model with `O_DIRECT` (faster cold loads on NVMe; ignored if not supported). |
|
| `direct_io` / `use_direct_io` | bool | `false` | Open the model with `O_DIRECT` (faster cold loads on NVMe; ignored if not supported). |
|
||||||
| `verbosity` | int | `3` | llama.cpp internal log verbosity threshold. Higher = more verbose. |
|
| `verbosity` | int | `3` | llama.cpp internal log verbosity threshold. Higher = more verbose. |
|
||||||
| `override_tensor` / `tensor_buft_overrides` | string | "" | Per-tensor buffer-type overrides for the main model. Format: `<tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...`. Mirrors the existing `draft_override_tensor` syntax for the draft model. |
|
| `override_tensor` / `tensor_buft_overrides` | string | "" | Per-tensor buffer-type overrides for the main model. Format: `<tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...`. Mirrors the existing `draft_override_tensor` syntax for the draft model. |
|
||||||
|
| `cpu_moe` | bool | false | Keep all MoE expert weights of the main model on CPU (upstream `--cpu-moe`). Frees VRAM on large MoE models (DeepSeek, Qwen3 `*-A3B`). |
|
||||||
|
| `n_cpu_moe` | int | 0 | Keep MoE expert weights of the first N main-model layers on CPU (upstream `--n-cpu-moe`). |
|
||||||
|
|
||||||
|
#### Generic option passthrough
|
||||||
|
|
||||||
|
Any `options:` entry whose name starts with `-` is forwarded **verbatim** to
|
||||||
|
upstream llama.cpp's own `llama-server` argument parser. This means any flag the
|
||||||
|
bundled llama.cpp supports works without LocalAI needing a dedicated option,
|
||||||
|
even ones added after your LocalAI version was built. See the upstream
|
||||||
|
[server flags reference](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md).
|
||||||
|
|
||||||
|
Format mirrors the rest of the array - `--flag` for a boolean, or `--flag:value`
|
||||||
|
for a flag that takes a value. Everything after the first `:` is the value, so
|
||||||
|
embedded colons (e.g. `host:port`) are preserved:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
options:
|
||||||
|
- "--cpu-moe" # boolean flag
|
||||||
|
- "--n-cpu-moe:4" # flag with a value
|
||||||
|
- "--override-tensor:exps=CPU"
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- **Precedence:** passthrough flags are applied last, so an explicit flag
|
||||||
|
overrides the LocalAI option it maps to (e.g. `--ctx-size:8192` overrides
|
||||||
|
`context_size`).
|
||||||
|
- **Power-user territory:** an invalid flag or value is rejected by the upstream
|
||||||
|
parser exactly as it would be by `llama-server`, which can fail model loading.
|
||||||
|
Prefer the named options above when one exists.
|
||||||
|
- Flags that would terminate the process (such as `--help`, `--usage`,
|
||||||
|
`--version`, `--license`, `--list-devices`, `--cache-list`, and
|
||||||
|
`--completion*`) are ignored.
|
||||||
|
|
||||||
### Prompt Caching
|
### Prompt Caching
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,26 @@ When running LocalAI behind a TLS termination reverse proxy, the Web UI may fail
|
|||||||
|
|
||||||
LocalAI uses the `X-Forwarded-Proto` HTTP header to determine the protocol used by clients. When this header is set to `https`, LocalAI will generate HTTPS URLs for static assets in the Web UI.
|
LocalAI uses the `X-Forwarded-Proto` HTTP header to determine the protocol used by clients. When this header is set to `https`, LocalAI will generate HTTPS URLs for static assets in the Web UI.
|
||||||
|
|
||||||
|
## Running behind a reverse proxy (HTTPS / subpath)
|
||||||
|
|
||||||
|
LocalAI does not terminate TLS itself, so HTTPS is provided by a reverse
|
||||||
|
proxy in front of it. Self-referential links (generated image and video
|
||||||
|
URLs, async job status URLs, OAuth callbacks) need the externally visible
|
||||||
|
scheme, host and port.
|
||||||
|
|
||||||
|
LocalAI determines these in this order:
|
||||||
|
|
||||||
|
1. `LOCALAI_BASE_URL` - if set, it is authoritative for the origin. Set it to
|
||||||
|
the externally visible base URL, e.g. `LOCALAI_BASE_URL=https://localai.example.com`
|
||||||
|
or `https://192.168.0.13:34567`. Recommended whenever links come back with
|
||||||
|
the wrong scheme or host.
|
||||||
|
2. Otherwise, the `X-Forwarded-Proto` and `X-Forwarded-Host` headers (or the
|
||||||
|
RFC 7239 `Forwarded` header) sent by the proxy. Ensure your proxy forwards
|
||||||
|
`X-Forwarded-Proto: https`.
|
||||||
|
|
||||||
|
A reverse-proxy subpath mount is supported via `X-Forwarded-Prefix`; it is
|
||||||
|
appended to `LOCALAI_BASE_URL` when both are present.
|
||||||
|
|
||||||
## Required Headers
|
## Required Headers
|
||||||
|
|
||||||
Your reverse proxy must forward these headers to LocalAI:
|
Your reverse proxy must forward these headers to LocalAI:
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "v4.4.3"
|
"version": "v4.5.0"
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user