mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-25 17:12:10 -04:00
Compare commits
38 Commits
v4.5.0
...
feat/darwi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
44b4b70991 | ||
|
|
286c508ce0 | ||
|
|
7652f328ac | ||
|
|
d5275fcef4 | ||
|
|
d1a9d59917 | ||
|
|
f72046b5b5 | ||
|
|
79783120dd | ||
|
|
4ac67d255d | ||
|
|
3a87d9e48f | ||
|
|
693e3eec05 | ||
|
|
f1e5071321 | ||
|
|
93d6255de3 | ||
|
|
fe4f425fb5 | ||
|
|
fae9f6356f | ||
|
|
066abf82c0 | ||
|
|
a7fec9a49d | ||
|
|
c678530cf0 | ||
|
|
3c63431e46 | ||
|
|
3f647a2764 | ||
|
|
f88981cdce | ||
|
|
0d6de15ae9 | ||
|
|
5c3d48ab50 | ||
|
|
764b0352b9 | ||
|
|
75ba2daba1 | ||
|
|
62b14fd635 | ||
|
|
193d0e6aef | ||
|
|
482314c623 | ||
|
|
e8ae88a2a0 | ||
|
|
e1994579f8 | ||
|
|
e5620989dd | ||
|
|
fc618dcee6 | ||
|
|
e6042080c0 | ||
|
|
0f3b24436d | ||
|
|
4b6f911835 | ||
|
|
a5e28942a6 | ||
|
|
dba9cd7ca4 | ||
|
|
c93190de50 | ||
|
|
4dbf69f889 |
@@ -17,19 +17,29 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
|
||||
fi
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
if [ -z "${BUILD_TYPE:-}" ]; then
|
||||
# Pure CPU image (BUILD_TYPE empty): one build with ggml CPU_ALL_VARIANTS replaces the
|
||||
# per-microarch binaries (x86: avx/avx2/avx512/fallback; arm64: armv8.x/armv9.x). ggml
|
||||
# dlopens the best libggml-cpu-*.so at runtime by probing host CPU features.
|
||||
#
|
||||
# arm64: the CPU_ALL_VARIANTS table includes armv9.2 SME variants whose -march=...+sme is
|
||||
# rejected by the Ubuntu 24.04 default gcc-13. gcc-14 accepts it, so build the arm64
|
||||
# variants with it (the host never *selects* SME unless it has it, but every variant must
|
||||
# still compile).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
make llama-cpp-cpu-all
|
||||
else
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-avx
|
||||
make llama-cpp-avx2
|
||||
make llama-cpp-avx512
|
||||
# GPU build (cublas/hipblas/sycl/vulkan/...): the accelerator does the compute, so a
|
||||
# single fallback CPU build is enough - no per-microarch CPU variants needed. (This also
|
||||
# keeps the heavy GPU backend compile from also building the whole CPU variant matrix,
|
||||
# and avoids the gcc-14 apt step on GPU base images such as nvidia l4t.)
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
fi
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
@@ -19,17 +19,21 @@ fi
|
||||
|
||||
cd /LocalAI/backend/cpp/turboquant
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
if [ -z "${BUILD_TYPE:-}" ]; then
|
||||
# Pure CPU image: one ggml CPU_ALL_VARIANTS build replaces the per-microarch binaries.
|
||||
# arm64: the armv9.2 SME variants need gcc-14 (gcc-13 rejects +sme).
|
||||
if [ "${TARGETARCH}" = "arm64" ]; then
|
||||
apt-get update -qq && apt-get install -y -qq gcc-14 g++-14
|
||||
export CC=gcc-14 CXX=g++-14
|
||||
fi
|
||||
make turboquant-cpu-all
|
||||
else
|
||||
make turboquant-avx
|
||||
make turboquant-avx2
|
||||
make turboquant-avx512
|
||||
# GPU build (cublas/hipblas/sycl/vulkan/...): single fallback CPU build, the accelerator
|
||||
# does the compute. Keeps the GPU compile from also building the CPU variant matrix and
|
||||
# avoids the gcc-14 apt step on GPU base images such as nvidia l4t.
|
||||
make turboquant-fallback
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
fi
|
||||
make turboquant-grpc
|
||||
make turboquant-rpc-server
|
||||
|
||||
ccache -s || true
|
||||
|
||||
48
.github/backend-matrix.yml
vendored
48
.github/backend-matrix.yml
vendored
@@ -4922,6 +4922,37 @@ includeDarwin:
|
||||
tag-suffix: "-metal-darwin-arm64-vibevoice-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
# Vision/utility C++/ggml backends (go+cgo). Their Makefiles already carry a
|
||||
# Darwin/Metal path (GGML_METAL=ON when build-type=metal); this just builds and
|
||||
# publishes the metal image so Apple Silicon can install them.
|
||||
- backend: "depth-anything-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-depth-anything-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "locate-anything-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-locate-anything-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "rfdetr-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-rfdetr-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "sam3-cpp"
|
||||
tag-suffix: "-metal-darwin-arm64-sam3-cpp"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
# privacy-filter (PII/NER) is a C++/ggml backend built by a bespoke darwin
|
||||
# script (make backends/privacy-filter-darwin); ggml defaults Metal ON on Apple
|
||||
# so the build is Metal-enabled. lang=go drives runner/toolchain selection only.
|
||||
- backend: "privacy-filter"
|
||||
tag-suffix: "-metal-darwin-arm64-privacy-filter"
|
||||
lang: "go"
|
||||
# LocalVQE has no Metal path; on Apple Silicon it builds CPU-only (GGML_METAL
|
||||
# OFF) but is still a native arm64 image. Uses the darwin/metal build profile.
|
||||
- backend: "localvqe"
|
||||
tag-suffix: "-metal-darwin-arm64-localvqe"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "voxtral"
|
||||
tag-suffix: "-metal-darwin-arm64-voxtral"
|
||||
build-type: "metal"
|
||||
@@ -4974,6 +5005,19 @@ includeDarwin:
|
||||
- backend: "kitten-tts"
|
||||
tag-suffix: "-metal-darwin-arm64-kitten-tts"
|
||||
build-type: "mps"
|
||||
# vLLM on Apple Silicon via vllm-metal (MLX). The install is custom
|
||||
# (backend/python/vllm/install.sh has a darwin branch); lang stays python so
|
||||
# backend_build_darwin.yml drives it through build-darwin-python-backend ->
|
||||
# scripts/build/python-darwin.sh, which runs the backend's install.sh.
|
||||
- backend: "vllm"
|
||||
tag-suffix: "-metal-darwin-arm64-vllm"
|
||||
build-type: "mps"
|
||||
- backend: "trl"
|
||||
tag-suffix: "-metal-darwin-arm64-trl"
|
||||
build-type: "mps"
|
||||
- backend: "liquid-audio"
|
||||
tag-suffix: "-metal-darwin-arm64-liquid-audio"
|
||||
build-type: "mps"
|
||||
- backend: "piper"
|
||||
tag-suffix: "-metal-darwin-arm64-piper"
|
||||
build-type: "metal"
|
||||
@@ -4990,6 +5034,10 @@ includeDarwin:
|
||||
tag-suffix: "-metal-darwin-arm64-sherpa-onnx"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "supertonic"
|
||||
tag-suffix: "-metal-darwin-arm64-supertonic"
|
||||
build-type: "metal"
|
||||
lang: "go"
|
||||
- backend: "local-store"
|
||||
tag-suffix: "-metal-darwin-arm64-local-store"
|
||||
build-type: "metal"
|
||||
|
||||
55
.github/bump_vllm_metal.sh
vendored
Executable file
55
.github/bump_vllm_metal.sh
vendored
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/bin/bash
|
||||
# Bump the single vllm-metal pin (VLLM_METAL_VERSION) in the vLLM backend's
|
||||
# darwin (Apple Silicon) install path. The macOS/Metal build
|
||||
# (backend/python/vllm/install.sh, Darwin branch) installs vllm-metal, which is
|
||||
# version-locked to a specific vLLM source release. install.sh derives that vLLM
|
||||
# version at build time from vllm-metal's own installer (`vllm_v=`) at the pinned
|
||||
# tag, so there is only ONE value to bump here -- mirroring bump_vllm_wheel.sh,
|
||||
# which bumps the Linux cu130 wheel pin.
|
||||
#
|
||||
# This deliberately tracks vllm-project/vllm-metal, NOT vllm-project/vllm: the
|
||||
# darwin build can only use the exact vLLM version vllm-metal supports, so it may
|
||||
# lag the Linux pin (requirements-cublas13-after.txt) until vllm-metal catches up.
|
||||
set -xe
|
||||
REPO=$1 # vllm-project/vllm-metal
|
||||
FILE=$2 # backend/python/vllm/install.sh
|
||||
VAR=$3 # VLLM_METAL_VERSION (used for the workflow's output file names)
|
||||
|
||||
if [ -z "$FILE" ] || [ -z "$REPO" ] || [ -z "$VAR" ]; then
|
||||
echo "usage: $0 <repo> <install-file> <var-name>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# vllm-metal ships frequent dev releases, all flagged as non-prerelease, so
|
||||
# /releases/latest returns the newest one (with its cp312 wheel asset).
|
||||
LATEST_TAG=$(curl -sS -H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/$REPO/releases/latest" \
|
||||
| python3 -c "import json,sys; print(json.load(sys.stdin)['tag_name'])")
|
||||
|
||||
# The coupled vLLM source version lives in vllm-metal's installer at that tag.
|
||||
NEW_VLLM_VERSION=$(curl -fsSL \
|
||||
"https://raw.githubusercontent.com/$REPO/$LATEST_TAG/install.sh" \
|
||||
| grep -oE 'vllm_v="[0-9]+\.[0-9]+\.[0-9]+"' | head -1 | cut -d'"' -f2)
|
||||
|
||||
if [ -z "$LATEST_TAG" ] || [ -z "$NEW_VLLM_VERSION" ]; then
|
||||
echo "Could not resolve vllm-metal tag ($LATEST_TAG) or its vllm_v ($NEW_VLLM_VERSION)." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set +e
|
||||
CURRENT_TAG=$(grep -oE 'VLLM_METAL_VERSION="[^"]*"' "$FILE" | head -1 | cut -d'"' -f2)
|
||||
set -e
|
||||
|
||||
# Rewrite the single pin. install.sh derives VLLM_VERSION from this tag at build
|
||||
# time, so there is nothing else to touch. peter-evans/create-pull-request opens
|
||||
# no PR on a clean tree, so a no-op rewrite (already current) is safe.
|
||||
sed -i "$FILE" \
|
||||
-e "s|VLLM_METAL_VERSION=\"[^\"]*\"|VLLM_METAL_VERSION=\"$LATEST_TAG\"|"
|
||||
|
||||
if [ -z "$CURRENT_TAG" ]; then
|
||||
echo "Could not find VLLM_METAL_VERSION=\"...\" in $FILE." >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "vllm-metal ${CURRENT_TAG} -> ${LATEST_TAG} (builds vLLM ${NEW_VLLM_VERSION}): https://github.com/$REPO/releases/tag/${LATEST_TAG}" >> "${VAR}_message.txt"
|
||||
echo "${LATEST_TAG}" >> "${VAR}_commit.txt"
|
||||
11
.github/workflows/backend_build_darwin.yml
vendored
11
.github/workflows/backend_build_darwin.yml
vendored
@@ -228,8 +228,17 @@ jobs:
|
||||
run: |
|
||||
make backends/ds4-darwin
|
||||
|
||||
# privacy-filter is a C++/ggml backend like ds4 - a single grpc-server with
|
||||
# otool dylib bundling - so it gets its own bespoke darwin script rather than
|
||||
# the generic build-darwin-go-backend path.
|
||||
- name: Build privacy-filter backend (Darwin Metal)
|
||||
if: inputs.backend == 'privacy-filter'
|
||||
run: |
|
||||
make protogen-go
|
||||
make backends/privacy-filter-darwin
|
||||
|
||||
- name: Build ${{ inputs.backend }}-darwin
|
||||
if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4'
|
||||
if: inputs.backend != 'llama-cpp' && inputs.backend != 'ds4' && inputs.backend != 'privacy-filter'
|
||||
run: |
|
||||
make protogen-go
|
||||
BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
|
||||
|
||||
36
.github/workflows/bump_deps.yaml
vendored
36
.github/workflows/bump_deps.yaml
vendored
@@ -154,3 +154,39 @@ jobs:
|
||||
branch: "update/VLLM_VERSION"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
signoff: true
|
||||
|
||||
bump-vllm-metal:
|
||||
# The darwin (Apple Silicon) vLLM build installs vllm-metal, which is locked
|
||||
# to a specific vLLM source release. install.sh pins both VLLM_METAL_VERSION
|
||||
# (the wheel release) and VLLM_VERSION (the vLLM it builds against); this job
|
||||
# tracks vllm-project/vllm-metal and rewrites both atomically. Separate from
|
||||
# bump-vllm-wheel because darwin follows vllm-metal, not vllm/vllm latest.
|
||||
if: github.repository == 'mudler/LocalAI'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v7
|
||||
- name: Bump vllm-metal pin 🔧
|
||||
id: bump
|
||||
run: |
|
||||
bash .github/bump_vllm_metal.sh vllm-project/vllm-metal backend/python/vllm/install.sh VLLM_METAL_VERSION
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
cat "VLLM_METAL_VERSION_message.txt"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo 'commit<<EOF'
|
||||
cat "VLLM_METAL_VERSION_commit.txt"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
rm -rfv VLLM_METAL_VERSION_message.txt VLLM_METAL_VERSION_commit.txt
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v8
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update vllm-project/vllm-metal (darwin)'
|
||||
title: 'chore: :arrow_up: Update vllm-metal (darwin) to `${{ steps.bump.outputs.commit }}`'
|
||||
branch: "update/VLLM_METAL_VERSION"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
signoff: true
|
||||
|
||||
6
Makefile
6
Makefile
@@ -1,5 +1,5 @@
|
||||
# Disable parallel execution for backend builds
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic backends/depth-anything-cpp backends/privacy-filter
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/turboquant backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/crispasr backends/parakeet-cpp backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/rfdetr-cpp backends/insightface backends/speaker-recognition backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/sglang backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp backends/qwen3-tts-cpp backends/omnivoice-cpp backends/vibevoice-cpp backends/localvqe backends/tinygrad backends/sherpa-onnx backends/ds4 backends/ds4-darwin backends/liquid-audio backends/supertonic backends/depth-anything-cpp backends/privacy-filter backends/privacy-filter-darwin
|
||||
|
||||
GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
@@ -1129,6 +1129,10 @@ backends/ds4-darwin: build
|
||||
bash ./scripts/build/ds4-darwin.sh
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/ds4.tar)"
|
||||
|
||||
backends/privacy-filter-darwin: build
|
||||
bash ./scripts/build/privacy-filter-darwin.sh
|
||||
./local-ai backends install "ocifile://$(abspath ./backend-images/privacy-filter.tar)"
|
||||
|
||||
build-darwin-python-backend: build
|
||||
bash ./scripts/build/python-darwin.sh
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=6c00e87ac84404af588ad2e65935bd6f079c696f
|
||||
IK_LLAMA_VERSION?=d5507e33ae7ee2b7b41475f08044d3bde3b839ee
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -50,8 +50,13 @@ add_custom_command(
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
# hw_grpc_proto: force STATIC. Under the CPU_ALL_VARIANTS build BUILD_SHARED_LIBS=ON
|
||||
# (ggml/llama become shared), which would otherwise make this glue library a DSO. As a
|
||||
# DSO it references the hidden-visibility symbols in the static libprotobuf.a, which the
|
||||
# linker cannot satisfy ("hidden symbol ... in libprotobuf.a is referenced by DSO").
|
||||
# Keeping it STATIC links protobuf/gRPC directly into the grpc-server executable while
|
||||
# only ggml/llama stay shared. No effect on the static variants (already BUILD_SHARED_LIBS=OFF).
|
||||
add_library(hw_grpc_proto STATIC
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=73618f27a801c0b8614ceaf3547d3c2a99baae14
|
||||
LLAMA_VERSION?=8be759e6f70d629638a7eb70db3824cbdcea370b
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
@@ -10,8 +10,16 @@ TARGET?=--target grpc-server
|
||||
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
|
||||
ARCH?=$(shell uname -m)
|
||||
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||
# Shared libs default to OFF: we link static gRPC and the avx/avx2/avx512/fallback
|
||||
# variants are fully static. The CPU_ALL_VARIANTS build flips SHARED_LIBS=ON (ggml/llama
|
||||
# become shared so the dynamic CPU backends work; gRPC stays static via its imported
|
||||
# targets). SHARED_LIBS is a make variable, not an appended -D, so it survives the
|
||||
# recursive sub-make into the VARIANT build dir (which re-parses this Makefile) instead
|
||||
# of being re-clobbered by a second -DBUILD_SHARED_LIBS=OFF. EXTRA_CMAKE_ARGS is the hook
|
||||
# the CPU_ALL_VARIANTS target uses to inject -DGGML_BACKEND_DL/-DGGML_CPU_ALL_VARIANTS.
|
||||
SHARED_LIBS?=OFF
|
||||
EXTRA_CMAKE_ARGS?=
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=$(SHARED_LIBS) -DLLAMA_CURL=OFF $(EXTRA_CMAKE_ARGS)
|
||||
|
||||
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
ifeq ($(NATIVE),false)
|
||||
@@ -120,6 +128,30 @@ llama-cpp-fallback: llama.cpp
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
# Single-build CPU backend using ggml's CPU_ALL_VARIANTS. Produces ONE grpc-server
|
||||
# plus a set of dlopen-able libggml-cpu-*.so (sandybridge/haswell/skylakex/...) that
|
||||
# ggml's backend registry selects from at runtime by probing host CPU features.
|
||||
# Replaces the avx/avx2/avx512/fallback multi-binary build on x86.
|
||||
#
|
||||
# CPU_ALL_VARIANTS requires GGML_BACKEND_DL, which requires BUILD_SHARED_LIBS=ON, so we
|
||||
# pass SHARED_LIBS=ON and the DL flags as make variables (NOT pre-expanded into the
|
||||
# CMAKE_ARGS env string): command-line make variables propagate through every recursive
|
||||
# sub-make, so the deepest VARIANT-dir build computes BUILD_SHARED_LIBS=ON consistently.
|
||||
# Only ggml/llama go shared - gRPC is found via its static imported targets, so the
|
||||
# grpc-server binary keeps static gRPC and only dynamically links ggml.
|
||||
#
|
||||
# TARGET adds "ggml": the per-microarch backends are runtime-dlopened, not link deps of
|
||||
# grpc-server, so they only build because each is an add_dependencies() of the ggml target.
|
||||
llama-cpp-cpu-all: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:cpu-all-variants${RESET})
|
||||
$(MAKE) SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" VARIANT="llama-cpp-cpu-all-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/grpc-server llama-cpp-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../llama-cpp-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "backend.pb.h"
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "common.h"
|
||||
#include "arg.h"
|
||||
#include "chat-auto-parser.h"
|
||||
#include <getopt.h>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
@@ -592,6 +593,10 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
params.checkpoint_min_step = 256;
|
||||
#endif
|
||||
|
||||
// Raw upstream llama-server flags collected from any option entry that
|
||||
// starts with '-'. Applied once after the loop via common_params_parse.
|
||||
std::vector<std::string> extra_argv;
|
||||
|
||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||
for (int i = 0; i < request->options_size(); i++) {
|
||||
std::string opt = request->options(i);
|
||||
@@ -1080,6 +1085,31 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
// --- main model MoE on CPU (upstream --cpu-moe / --n-cpu-moe) ---
|
||||
} else if (!strcmp(optname, "cpu_moe")) {
|
||||
// Bool-style flag: keep all MoE expert weights on CPU.
|
||||
const bool enable = (optval == NULL) ||
|
||||
optval_str == "true" || optval_str == "1" || optval_str == "yes" ||
|
||||
optval_str == "on" || optval_str == "enabled";
|
||||
if (enable) {
|
||||
params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
|
||||
}
|
||||
} else if (!strcmp(optname, "n_cpu_moe")) {
|
||||
if (optval != NULL) {
|
||||
try {
|
||||
int n = std::stoi(optval_str);
|
||||
if (n < 0) n = 0;
|
||||
// Keep override-name storage alive for the lifetime of the
|
||||
// params struct (mirrors upstream arg.cpp's function-local static).
|
||||
static std::list<std::string> buft_overrides_main;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
buft_overrides_main.push_back(llm_ffn_exps_block_regex(i));
|
||||
params.tensor_buft_overrides.push_back(
|
||||
{buft_overrides_main.back().c_str(), ggml_backend_cpu_buffer_type()});
|
||||
}
|
||||
} catch (...) {}
|
||||
}
|
||||
|
||||
// --- draft model tensor buffer overrides (upstream --spec-draft-override-tensor) ---
|
||||
} else if (!strcmp(optname, "draft_override_tensor") || !strcmp(optname, "spec_draft_override_tensor")) {
|
||||
// Format: <tensor regex>=<buffer type>,<tensor regex>=<buffer type>,...
|
||||
@@ -1111,6 +1141,30 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
else { cur.push_back(c); }
|
||||
}
|
||||
if (!cur.empty()) flush(cur);
|
||||
|
||||
// --- generic passthrough: any entry starting with '-' is a raw
|
||||
// upstream llama-server flag, forwarded verbatim to the parser. ---
|
||||
} else if (optname[0] == '-') {
|
||||
std::string flag = optname;
|
||||
// These flags make upstream's parser exit() (printing usage /
|
||||
// completion), which would kill the backend process. Skip them.
|
||||
if (flag == "-h" || flag == "--help" || flag == "--usage" ||
|
||||
flag == "--version" || flag == "--license" ||
|
||||
flag == "--list-devices" || flag == "-cl" ||
|
||||
flag == "--cache-list" ||
|
||||
flag.rfind("--completion", 0) == 0) {
|
||||
fprintf(stderr,
|
||||
"[llama-cpp] ignoring passthrough flag that would exit: %s\n",
|
||||
flag.c_str());
|
||||
} else {
|
||||
extra_argv.push_back(flag);
|
||||
// Preserve the whole value after the first ':' so embedded
|
||||
// colons (e.g. host:port) survive strtok's truncation of optval.
|
||||
auto colon = opt.find(':');
|
||||
if (colon != std::string::npos) {
|
||||
extra_argv.push_back(opt.substr(colon + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1146,27 +1200,6 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.kv_overrides.empty()) {
|
||||
params.kv_overrides.emplace_back();
|
||||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
// tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
|
||||
// Real entries are pushed during option parsing; here we pad/terminate so the
|
||||
// model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
|
||||
// and so llama_params_fit has the placeholder slots it requires.
|
||||
{
|
||||
const size_t ntbo = llama_max_tensor_buft_overrides();
|
||||
while (params.tensor_buft_overrides.size() < ntbo) {
|
||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
// Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
|
||||
// the main-model handling above.
|
||||
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
||||
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
@@ -1259,6 +1292,69 @@ static void params_parse(server_context& /*ctx_server*/, const backend::ModelOpt
|
||||
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
||||
}
|
||||
}
|
||||
|
||||
// Apply any raw upstream flags last so an explicit passthrough flag wins
|
||||
// over the LocalAI-resolved field it maps to (e.g. --ctx-size beats
|
||||
// context_size). This is the same parser llama-server itself uses.
|
||||
if (!extra_argv.empty()) {
|
||||
// common_params_parser_init resets a few fields for the SERVER example
|
||||
// (n_parallel -> -1, use_color). Snapshot n_parallel so an unrelated
|
||||
// passthrough flag can't silently clobber LocalAI's resolved value.
|
||||
const int saved_n_parallel = params.n_parallel;
|
||||
|
||||
std::vector<char *> argv;
|
||||
std::string prog = "llama-server";
|
||||
argv.push_back(prog.data());
|
||||
for (auto & a : extra_argv) {
|
||||
argv.push_back(a.data());
|
||||
}
|
||||
|
||||
// ctx_arg.params is a reference, so this overlays the given flags onto
|
||||
// `params` in place. Returns false on a recoverable parse error (and
|
||||
// self-restores params); may exit() on a hard error, exactly as
|
||||
// passing the same bad flag to llama-server would.
|
||||
if (!common_params_parse((int)argv.size(), argv.data(), params,
|
||||
LLAMA_EXAMPLE_SERVER)) {
|
||||
fprintf(stderr,
|
||||
"[llama-cpp] failed to parse passthrough options; ignoring them\n");
|
||||
}
|
||||
|
||||
// Restore n_parallel unless a passthrough flag explicitly set it
|
||||
// (parser_init's reset sentinel for SERVER is -1).
|
||||
if (params.n_parallel == -1) {
|
||||
params.n_parallel = saved_n_parallel;
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate/pad the override vectors only after BOTH the named-option loop
|
||||
// and the generic passthrough (common_params_parse above) have pushed their
|
||||
// real entries, so back() is the null sentinel the model loader asserts on.
|
||||
// Running these before the passthrough let a passthrough flag (--cpu-moe,
|
||||
// --override-tensor, --override-kv, ...) append a real entry after the
|
||||
// sentinel: a GGML_ASSERT crash for tensor_buft_overrides, a silent drop for
|
||||
// kv_overrides. Double-termination is harmless (the while is a no-op if the
|
||||
// passthrough parse already padded; an extra trailing null is ignored).
|
||||
|
||||
if (!params.kv_overrides.empty()) {
|
||||
params.kv_overrides.emplace_back();
|
||||
params.kv_overrides.back().key[0] = 0;
|
||||
}
|
||||
|
||||
// tensor_buft_overrides sentinel termination (mirrors upstream common/arg.cpp).
|
||||
// Real entries are pushed during option parsing; here we pad/terminate so the
|
||||
// model loader sees back().pattern == nullptr (GGML_ASSERT at common.cpp:1543)
|
||||
// and so llama_params_fit has the placeholder slots it requires.
|
||||
{
|
||||
const size_t ntbo = llama_max_tensor_buft_overrides();
|
||||
while (params.tensor_buft_overrides.size() < ntbo) {
|
||||
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
// Terminate the draft tensor_buft_overrides list with a sentinel, mirroring
|
||||
// the main-model handling above.
|
||||
if (!params.speculative.draft.tensor_buft_overrides.empty()) {
|
||||
params.speculative.draft.tensor_buft_overrides.push_back({nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -14,6 +14,22 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends produced by the CPU_ALL_VARIANTS build (libggml-base.so,
|
||||
# libggml.so, libllama.so and the per-microarch libggml-cpu-*.so), all into package/lib.
|
||||
#
|
||||
# Two distinct resolution mechanisms both land here:
|
||||
# - NEEDED deps (libggml-base/libggml/libllama): resolved by the dynamic linker via the
|
||||
# LD_LIBRARY_PATH=$CURDIR/lib that run.sh exports.
|
||||
# - The per-microarch libggml-cpu-*.so are NOT linked; ggml *discovers* them at runtime by
|
||||
# scanning the executable's own directory (readlink /proc/self/exe). run.sh launches via
|
||||
# the bundled $CURDIR/lib/ld.so, so /proc/self/exe -> .../lib/ld.so and ggml scans lib/.
|
||||
# That is why the variants must sit in lib/ (next to ld.so), not just on the link path.
|
||||
# No-op on builds (arm64/darwin) that don't produce the all-variants set.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -12,26 +12,12 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=llama-cpp-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||
BINARY=llama-cpp-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||
BINARY=llama-cpp-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||
BINARY=llama-cpp-avx512
|
||||
fi
|
||||
# CPU images (x86, arm64, darwin) ship a single llama-cpp-cpu-all built with ggml
|
||||
# CPU_ALL_VARIANTS: ggml's backend registry dlopens the best libggml-cpu-*.so for this
|
||||
# host, so no shell-side AVX probing. GPU images (cublas/sycl/vulkan/hipblas) ship only
|
||||
# llama-cpp-fallback (the accelerator does the compute), so fall back to it when absent.
|
||||
if [ -e $CURDIR/llama-cpp-cpu-all ]; then
|
||||
BINARY=llama-cpp-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
|
||||
@@ -51,6 +51,14 @@ add_library(hw_grpc_proto STATIC
|
||||
${HW_GRPC_SRCS} ${HW_GRPC_HDRS}
|
||||
${HW_PROTO_SRCS} ${HW_PROTO_HDRS})
|
||||
target_include_directories(hw_grpc_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
# The generated proto/grpc sources include protobuf and grpc++ headers, so this
|
||||
# library must see their include dirs. Linking the imported targets propagates
|
||||
# them. On Linux the apt headers live in /usr/include (default search path) so
|
||||
# this was a no-op; on macOS the Homebrew headers are under /opt/homebrew and
|
||||
# would otherwise be missed (runtime_version.h not found).
|
||||
target_link_libraries(hw_grpc_proto PUBLIC
|
||||
protobuf::libprotobuf
|
||||
gRPC::grpc++)
|
||||
|
||||
# Build only the pf static lib (+ ggml) from the engine tree — no CLI/bench/tests.
|
||||
# PF_VULKAN is honored when passed on the cmake command line (it lands in the
|
||||
|
||||
@@ -2,7 +2,13 @@
|
||||
# Entry point for the privacy-filter backend image / BACKEND_BINARY mode.
|
||||
set -e
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$LD_LIBRARY_PATH"
|
||||
# macOS has no bundled ld.so; the darwin package ships only dylibs under lib/,
|
||||
# resolved via DYLD_LIBRARY_PATH (the ld.so branch below is skipped there).
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR/lib:$DYLD_LIBRARY_PATH"
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$LD_LIBRARY_PATH"
|
||||
fi
|
||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||
exec "$CURDIR/lib/ld.so" "$CURDIR/grpc-server" "$@"
|
||||
fi
|
||||
|
||||
@@ -65,6 +65,29 @@ turboquant-avx:
|
||||
turboquant-fallback:
|
||||
$(call turboquant-build,fallback,-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server)
|
||||
|
||||
# Single-build CPU backend via ggml CPU_ALL_VARIANTS (mirrors llama-cpp-cpu-all).
|
||||
# turboquant reuses backend/cpp/llama-cpp's CMakeLists.txt (hw_grpc_proto STATIC) and
|
||||
# Makefile (SHARED_LIBS make-var + EXTRA_CMAKE_ARGS), so this passes the same overrides
|
||||
# through to the copied build: SHARED_LIBS=ON, the DL flags, and --target ggml (which
|
||||
# pulls in the per-microarch libggml-cpu-*.so via ggml's add_dependencies). The .so set
|
||||
# is collected for package.sh to bundle into package/lib.
|
||||
turboquant-cpu-all:
|
||||
rm -rf $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
cp -rf $(LLAMA_CPP_DIR) $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build purge
|
||||
bash $(CURRENT_MAKEFILE_DIR)/patch-grpc-server.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server.cpp
|
||||
$(info $(GREEN)I turboquant build info:cpu-all-variants$(RESET))
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build llama.cpp
|
||||
bash $(CURRENT_MAKEFILE_DIR)/apply-patches.sh $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp $(PATCHES_DIR)
|
||||
SHARED_LIBS=ON EXTRA_CMAKE_ARGS="-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON" TARGET="--target grpc-server --target ggml" \
|
||||
LLAMA_REPO=$(LLAMA_REPO) LLAMA_VERSION=$(TURBOQUANT_VERSION) \
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/grpc-server turboquant-cpu-all
|
||||
rm -rf ggml-shared-libs && mkdir -p ggml-shared-libs
|
||||
find $(CURRENT_MAKEFILE_DIR)/../turboquant-cpu-all-build/llama.cpp/build \( -name '*.so*' -o -name '*.dylib' \) -exec cp -av {} ggml-shared-libs/ \;
|
||||
@echo "Collected ggml shared backends:" && ls -la ggml-shared-libs/
|
||||
|
||||
turboquant-grpc:
|
||||
$(call turboquant-build,grpc,-DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off,--target grpc-server --target rpc-server)
|
||||
|
||||
|
||||
@@ -14,6 +14,15 @@ mkdir -p $CURDIR/package/lib
|
||||
cp -avrf $CURDIR/turboquant-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Bundle the ggml shared backends from the CPU_ALL_VARIANTS build into package/lib. ggml
|
||||
# discovers the per-microarch libggml-cpu-*.so by scanning the executable directory, which
|
||||
# (via the bundled lib/ld.so that run.sh launches through) resolves to lib/. See the
|
||||
# matching comment in backend/cpp/llama-cpp/package.sh. No-op on the fallback/ROCm builds.
|
||||
if [ -d "$CURDIR/ggml-shared-libs" ]; then
|
||||
echo "Bundling ggml shared backends (CPU_ALL_VARIANTS)..."
|
||||
cp -avf $CURDIR/ggml-shared-libs/*.so* $CURDIR/package/lib/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
|
||||
@@ -12,26 +12,11 @@ grep -e "flags" /proc/cpuinfo | head -1
|
||||
|
||||
BINARY=turboquant-fallback
|
||||
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/turboquant-avx ]; then
|
||||
BINARY=turboquant-avx
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/turboquant-avx2 ]; then
|
||||
BINARY=turboquant-avx2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/turboquant-avx512 ]; then
|
||||
BINARY=turboquant-avx512
|
||||
fi
|
||||
# x86/arm64 ship a single turboquant-cpu-all built with ggml CPU_ALL_VARIANTS: ggml's
|
||||
# backend registry dlopens the best libggml-cpu-*.so for this host, so no shell-side
|
||||
# probing. ROCm ships only turboquant-fallback, so fall back to it when cpu-all is absent.
|
||||
if [ -e $CURDIR/turboquant-cpu-all ]; then
|
||||
BINARY=turboquant-cpu-all
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||
|
||||
@@ -117,7 +117,8 @@ libgoacestepcpp-custom: CMakeLists.txt cpp/goacestepcpp.cpp cpp/goacestepcpp.h
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target goacestepcpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgoacestepcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgoacestepcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: acestep-cpp
|
||||
@echo "Running acestep-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -22,7 +23,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("ACESTEP_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgoacestepcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgoacestepcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgoacestepcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -13,6 +13,7 @@ mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/acestep-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoacestepcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoacestepcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -12,9 +12,19 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single library variant (Metal or Accelerate). The goacestepcpp
|
||||
# target is built as a CMake MODULE, which emits a .dylib for a SHARED
|
||||
# build but a .so for a MODULE build on Apple, so prefer .dylib and fall
|
||||
# back to .so.
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.dylib"
|
||||
if [ ! -e "$LIBRARY" ]; then
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||
fi
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgoacestepcpp-avx.so ]; then
|
||||
@@ -36,9 +46,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgoacestepcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ACESTEP_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -57,6 +57,7 @@ libced.so: sources/ced.cpp
|
||||
cmake -B sources/ced.cpp/build-shared -S sources/ced.cpp $(CMAKE_ARGS)
|
||||
cmake --build sources/ced.cpp/build-shared --config Release -j$(JOBS)
|
||||
cp -fv sources/ced.cpp/build-shared/libced.so* ./ 2>/dev/null || true
|
||||
cp -fv sources/ced.cpp/build-shared/libced.dylib ./ 2>/dev/null || true
|
||||
cp -fv sources/ced.cpp/include/ced_capi.h ./
|
||||
|
||||
ced-grpc: libced.so main.go goced.go
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ type libFunc struct {
|
||||
func main() {
|
||||
libName := os.Getenv("CED_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libced.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "libced.dylib"
|
||||
} else {
|
||||
libName = "libced.so"
|
||||
}
|
||||
}
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
|
||||
@@ -15,10 +15,12 @@ mkdir -p "$CURDIR/package/lib"
|
||||
cp -avf "$CURDIR/ced-grpc" "$CURDIR/package/"
|
||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||
|
||||
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||
echo "ERROR: libced.so not found in $CURDIR, run 'make' first" >&2
|
||||
cp -avf "$CURDIR"/libced.so* "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
cp -avf "$CURDIR"/libced.dylib "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
if ! ls "$CURDIR"/package/lib/libced.* >/dev/null 2>&1; then
|
||||
echo "ERROR: libced shared library not found in $CURDIR, run 'make' first" >&2
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
|
||||
@@ -3,7 +3,12 @@ set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
|
||||
export CED_LIBRARY="$CURDIR/lib/libced.dylib"
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
fi
|
||||
|
||||
# If a self-contained ld.so was packaged, route through it so the packaged
|
||||
# libc / libstdc++ are used instead of the host's (matches the sibling backends).
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=63b57289255267edf66e43e33bc3911e04a2e92d
|
||||
CRISPASR_VERSION?=96b2a6ee31d30389fed8a7ef1a54239b75231ddc
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -75,7 +75,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgocrispasr-avx.so libgocrispasr-avx2.so libgocrispasr-avx512.so libgocrispasr-fallback.so
|
||||
else
|
||||
VARIANT_TARGETS = libgocrispasr-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgocrispasr-fallback.dylib
|
||||
endif
|
||||
|
||||
crispasr: main.go gocrispasr.go $(VARIANT_TARGETS)
|
||||
@@ -87,7 +88,7 @@ package: crispasr
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgocrispasr*.so package sources/CrispASR crispasr
|
||||
rm -rf libgocrispasr*.so libgocrispasr*.dylib package sources/CrispASR crispasr
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -118,13 +119,21 @@ libgocrispasr-fallback.so: sources/CrispASR
|
||||
SO_TARGET=libgocrispasr-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
||||
rm -rfv build*
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgocrispasr-fallback.dylib: sources/CrispASR
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I crispasr build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgocrispasr-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgocrispasr-custom
|
||||
rm -rfv build*
|
||||
|
||||
libgocrispasr-custom: CMakeLists.txt cpp/crispasr_shim.cpp cpp/crispasr_shim.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgocrispasr.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgocrispasr.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: crispasr
|
||||
CGO_ENABLED=0 $(GOCMD) test -v ./...
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("CRISPASR_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgocrispasr-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgocrispasr-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgocrispasr-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/crispasr $CURDIR/package/
|
||||
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgocrispasr-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgocrispasr-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgocrispasr-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgocrispasr-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgocrispasr-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgocrispasr-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export CRISPASR_LIBRARY=$LIBRARY
|
||||
|
||||
# Point piper's espeak-ng phonemizer at the bundled voice data. The variable
|
||||
|
||||
@@ -40,6 +40,8 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DDA_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
|
||||
# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
@@ -77,7 +79,7 @@ ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libdepthanythingcpp-avx.so libdepthanythingcpp-avx2.so libdepthanythingcpp-avx512.so libdepthanythingcpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libdepthanythingcpp-fallback.so
|
||||
VARIANT_TARGETS = libdepthanythingcpp-fallback.dylib
|
||||
endif
|
||||
|
||||
depth-anything-cpp: main.go godepthanythingcpp.go $(VARIANT_TARGETS)
|
||||
@@ -89,7 +91,7 @@ package: depth-anything-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libdepthanythingcpp*.so depth-anything-cpp package sources
|
||||
rm -rf libdepthanythingcpp*.so libdepthanythingcpp*.dylib depth-anything-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -116,11 +118,19 @@ libdepthanythingcpp-avx512.so: sources/depth-anything.cpp
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
libdepthanythingcpp-fallback.dylib: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
else
|
||||
libdepthanythingcpp-fallback.so: sources/depth-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I depth-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libdepthanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
endif
|
||||
|
||||
libdepthanythingcpp-custom: CMakeLists.txt
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
@@ -128,7 +138,8 @@ libdepthanythingcpp-custom: CMakeLists.txt
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libdepthanything.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libdepthanything.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: depth-anything-cpp package
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("DEPTHANYTHING_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libdepthanythingcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libdepthanythingcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libdepthanythingcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libdepthanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libdepthanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/depth-anything-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libdepthanythingcpp-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libdepthanythingcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export DEPTHANYTHING_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -32,6 +32,8 @@ endif
|
||||
ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DLOCALVQE_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# Apple Silicon: CPU-only (no Metal upstream); built + published as an arm64
|
||||
# image by CI (includeDarwin in .github/backend-matrix.yml) for macOS install.
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
endif
|
||||
|
||||
@@ -67,8 +69,9 @@ $(LIB_SENTINEL): sources/LocalVQE
|
||||
# that the loader picks at runtime. We must build every target — the
|
||||
# default `--target localvqe_shared` drops these. CMAKE_LIBRARY_OUTPUT_DIRECTORY
|
||||
# routes all of them into build/bin; copy them out next to the binary.
|
||||
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.so* .
|
||||
cp -P build/bin/liblocalvqe.so* . 2>/dev/null || cp -P build/bin/liblocalvqe.dylib . 2>/dev/null || cp -P build/liblocalvqe.so* . 2>/dev/null || cp -P build/liblocalvqe.dylib .
|
||||
cp -P build/bin/libggml*.so* . 2>/dev/null || true
|
||||
cp -P build/bin/libggml*.dylib . 2>/dev/null || true
|
||||
touch $(LIB_SENTINEL)
|
||||
|
||||
liblocalvqe.so: $(LIB_SENTINEL)
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("LOCALVQE_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./liblocalvqe.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./liblocalvqe.dylib"
|
||||
} else {
|
||||
libName = "./liblocalvqe.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -15,7 +15,9 @@ cp -avf $CURDIR/localvqe $CURDIR/package/
|
||||
# liblocalvqe.so* (with SOVERSION symlinks) and the libggml-*.so runtime
|
||||
# variants — LocalVQE picks the matching CPU variant at load time.
|
||||
cp -P $CURDIR/liblocalvqe.so* $CURDIR/package/ 2>/dev/null || true
|
||||
cp -P $CURDIR/liblocalvqe.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -P $CURDIR/libggml*.so* $CURDIR/package/ 2>/dev/null || true
|
||||
cp -P $CURDIR/libggml*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -10,8 +10,19 @@ CURDIR=$(dirname "$(realpath $0)")
|
||||
# exec'ing the binary.
|
||||
cd "$CURDIR"
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: LocalVQE is built as a SHARED library, so dyld needs the .dylib +
|
||||
# DYLD_LIBRARY_PATH. Prefer .dylib and fall back to .so just in case.
|
||||
export DYLD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.dylib
|
||||
if [ ! -e "$LOCALVQE_LIBRARY" ]; then
|
||||
LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
||||
fi
|
||||
export LOCALVQE_LIBRARY
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR:$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LOCALVQE_LIBRARY=$CURDIR/liblocalvqe.so
|
||||
fi
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
|
||||
@@ -33,6 +33,8 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DLA_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
|
||||
# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
@@ -70,7 +72,7 @@ ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = liblocateanythingcpp-avx.so liblocateanythingcpp-avx2.so liblocateanythingcpp-avx512.so liblocateanythingcpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = liblocateanythingcpp-fallback.so
|
||||
VARIANT_TARGETS = liblocateanythingcpp-fallback.dylib
|
||||
endif
|
||||
|
||||
locate-anything-cpp: main.go golocateanythingcpp.go $(VARIANT_TARGETS)
|
||||
@@ -82,7 +84,7 @@ package: locate-anything-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf liblocateanythingcpp*.so locate-anything-cpp package sources
|
||||
rm -rf liblocateanythingcpp*.so liblocateanythingcpp*.dylib locate-anything-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -109,11 +111,19 @@ liblocateanythingcpp-avx512.so: sources/locate-anything.cpp
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
liblocateanythingcpp-fallback.dylib: sources/locate-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
else
|
||||
liblocateanythingcpp-fallback.so: sources/locate-anything.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I locate-anything-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) liblocateanythingcpp-custom
|
||||
rm -rfv build-$@
|
||||
endif
|
||||
|
||||
liblocateanythingcpp-custom: CMakeLists.txt
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
@@ -121,7 +131,8 @@ liblocateanythingcpp-custom: CMakeLists.txt
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/liblocateanythingcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/liblocateanythingcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: locate-anything-cpp package
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("LOCATEANYTHING_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./liblocateanythingcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./liblocateanythingcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./liblocateanythingcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/liblocateanythingcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/liblocateanythingcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/locate-anything-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/liblocateanythingcpp-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/liblocateanythingcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export LOCATEANYTHING_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# omnivoice.cpp version
|
||||
OMNIVOICE_REPO?=https://github.com/ServeurpersoCom/omnivoice.cpp
|
||||
OMNIVOICE_VERSION?=96d30169afd5e6bb3fd6a0e9be0eb505bfe81fcd
|
||||
OMNIVOICE_VERSION?=0f37401bebe9b20c0160a888e592108fc1d17607
|
||||
SO_TARGET?=libgomnivoicecpp.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -65,7 +65,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgomnivoicecpp-avx.so libgomnivoicecpp-avx2.so libgomnivoicecpp-avx512.so libgomnivoicecpp-fallback.so
|
||||
else
|
||||
VARIANT_TARGETS = libgomnivoicecpp-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgomnivoicecpp-fallback.dylib
|
||||
endif
|
||||
|
||||
omnivoice-cpp: main.go gomnivoicecpp.go $(VARIANT_TARGETS)
|
||||
@@ -77,7 +78,7 @@ package: omnivoice-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgomnivoicecpp*.so package sources/omnivoice.cpp omnivoice-cpp
|
||||
rm -rf libgomnivoicecpp*.so libgomnivoicecpp*.dylib package sources/omnivoice.cpp omnivoice-cpp
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -106,13 +107,20 @@ libgomnivoicecpp-fallback.so: sources/omnivoice.cpp
|
||||
SO_TARGET=libgomnivoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
||||
rm -rf build-libgomnivoicecpp-fallback.so
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgomnivoicecpp-fallback.dylib: sources/omnivoice.cpp
|
||||
$(info ${GREEN}I omnivoice-cpp build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgomnivoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgomnivoicecpp-custom
|
||||
rm -rf build-libgomnivoicecpp-fallback.dylib
|
||||
|
||||
libgomnivoicecpp-custom: CMakeLists.txt cpp/gomnivoicecpp.cpp cpp/gomnivoicecpp.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target gomnivoicecpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgomnivoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgomnivoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: omnivoice-cpp
|
||||
@echo "Running omnivoice-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("OMNIVOICE_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgomnivoicecpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgomnivoicecpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgomnivoicecpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/omnivoice-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgomnivoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgomnivoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgomnivoicecpp-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgomnivoicecpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export OMNIVOICE_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=db755a78d39f789bb7d4e3935158a9e8105dbe36
|
||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
@@ -74,6 +74,7 @@ libparakeet.so: sources/parakeet.cpp
|
||||
cmake -B sources/parakeet.cpp/build-shared -S sources/parakeet.cpp $(CMAKE_ARGS)
|
||||
cmake --build sources/parakeet.cpp/build-shared --config Release -j$(JOBS)
|
||||
cp -fv sources/parakeet.cpp/build-shared/libparakeet.so* ./ 2>/dev/null || true
|
||||
cp -fv sources/parakeet.cpp/build-shared/libparakeet.dylib ./ 2>/dev/null || true
|
||||
cp -fv sources/parakeet.cpp/include/parakeet_capi.h ./
|
||||
|
||||
parakeet-cpp-grpc: libparakeet.so main.go goparakeetcpp.go
|
||||
|
||||
@@ -2,15 +2,17 @@ package main
|
||||
|
||||
// Started internally by LocalAI - one gRPC server per loaded model.
|
||||
//
|
||||
// Loads libparakeet.so via purego and registers the flat C-API entry
|
||||
// points declared in parakeet_capi.h. The library name can be overridden
|
||||
// with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY / VIBEVOICECPP_LIBRARY
|
||||
// convention in the sibling backends); the default looks for the .so next
|
||||
// to this binary.
|
||||
// Loads the parakeet shared library via purego and registers the flat
|
||||
// C-API entry points declared in parakeet_capi.h. The library name can be
|
||||
// overridden with PARAKEET_LIBRARY (mirrors the WHISPER_LIBRARY /
|
||||
// VIBEVOICECPP_LIBRARY convention in the sibling backends); the default
|
||||
// looks next to this binary for libparakeet.so on Linux and
|
||||
// libparakeet.dylib on macOS.
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -28,7 +30,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("PARAKEET_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "libparakeet.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "libparakeet.dylib"
|
||||
} else {
|
||||
libName = "libparakeet.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -16,12 +16,15 @@ mkdir -p "$CURDIR/package/lib"
|
||||
cp -avf "$CURDIR/parakeet-cpp-grpc" "$CURDIR/package/"
|
||||
cp -avf "$CURDIR/run.sh" "$CURDIR/package/"
|
||||
|
||||
# libparakeet.so + any soname symlinks (libparakeet.so.X[.Y]). purego.Dlopen
|
||||
# resolves it via LD_LIBRARY_PATH, which run.sh points at lib/.
|
||||
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || {
|
||||
echo "ERROR: libparakeet.so not found in $CURDIR, run 'make' first" >&2
|
||||
# libparakeet shared lib + any soname symlinks. On Linux this is
|
||||
# libparakeet.so[.X.Y]; on macOS it is libparakeet.dylib. purego.Dlopen
|
||||
# resolves it via the *_LIBRARY_PATH that run.sh points at lib/.
|
||||
cp -avf "$CURDIR"/libparakeet.so* "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
cp -avf "$CURDIR"/libparakeet.dylib "$CURDIR/package/lib/" 2>/dev/null || true
|
||||
if ! ls "$CURDIR"/package/lib/libparakeet.* >/dev/null 2>&1; then
|
||||
echo "ERROR: libparakeet shared library not found in $CURDIR, run 'make' first" >&2
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Detect architecture and copy the core runtime libs libparakeet.so links
|
||||
# against, plus the matching dynamic loader as lib/ld.so.
|
||||
@@ -48,7 +51,7 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 "$CURDIR/package/lib/librt.so.1"
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 "$CURDIR/package/lib/libpthread.so.0"
|
||||
elif [ "$(uname -s)" = "Darwin" ]; then
|
||||
echo "Detected Darwin"
|
||||
echo "Detected Darwin — system frameworks linked dynamically, no bundled libs needed"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
|
||||
@@ -3,11 +3,17 @@ set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${DYLD_LIBRARY_PATH:-}"
|
||||
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.dylib"
|
||||
else
|
||||
export LD_LIBRARY_PATH="$CURDIR/lib:$CURDIR:${LD_LIBRARY_PATH:-}"
|
||||
export PARAKEET_LIBRARY="$CURDIR/lib/libparakeet.so"
|
||||
fi
|
||||
|
||||
# If a self-contained ld.so was packaged, route through it so the
|
||||
# packaged libc / libstdc++ are used instead of the host's (matches the
|
||||
# whisper backend's runtime layout).
|
||||
# whisper backend's runtime layout). Linux only.
|
||||
if [ -f "$CURDIR/lib/ld.so" ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec "$CURDIR/lib/ld.so" "$CURDIR/parakeet-cpp-grpc" "$@"
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# qwentts.cpp version
|
||||
QWEN3TTS_REPO?=https://github.com/ServeurpersoCom/qwentts.cpp
|
||||
QWEN3TTS_CPP_VERSION?=4536dcdce27c3764a93a06d6bf64026b124962f5
|
||||
QWEN3TTS_CPP_VERSION?=9dbe7ea26a01b30fccb117ae5e86807c1dc23d42
|
||||
SO_TARGET?=libgoqwen3ttscpp.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -65,8 +65,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgoqwen3ttscpp-avx.so libgoqwen3ttscpp-avx2.so libgoqwen3ttscpp-avx512.so libgoqwen3ttscpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgoqwen3ttscpp-fallback.dylib
|
||||
endif
|
||||
|
||||
qwen3-tts-cpp: main.go goqwen3ttscpp.go $(VARIANT_TARGETS)
|
||||
@@ -78,7 +78,7 @@ package: qwen3-tts-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgoqwen3ttscpp*.so package sources/qwentts.cpp qwen3-tts-cpp
|
||||
rm -rf libgoqwen3ttscpp*.so libgoqwen3ttscpp*.dylib package sources/qwentts.cpp qwen3-tts-cpp
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -110,13 +110,20 @@ libgoqwen3ttscpp-fallback.so: sources/qwentts.cpp
|
||||
SO_TARGET=libgoqwen3ttscpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
||||
rm -rf build-libgoqwen3ttscpp-fallback.so
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgoqwen3ttscpp-fallback.dylib: sources/qwentts.cpp
|
||||
$(info ${GREEN}I qwen3-tts-cpp build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgoqwen3ttscpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgoqwen3ttscpp-custom
|
||||
rm -rf build-libgoqwen3ttscpp-fallback.dylib
|
||||
|
||||
libgoqwen3ttscpp-custom: CMakeLists.txt cpp/goqwen3ttscpp.cpp cpp/goqwen3ttscpp.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target goqwen3ttscpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgoqwen3ttscpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgoqwen3ttscpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: qwen3-tts-cpp
|
||||
@echo "Running qwen3-tts-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("QWEN3TTS_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgoqwen3ttscpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgoqwen3ttscpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgoqwen3ttscpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/qwen3-tts-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgoqwen3ttscpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgoqwen3ttscpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgoqwen3ttscpp-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgoqwen3ttscpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export QWEN3TTS_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -34,6 +34,8 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON -DRFDETR_GGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
|
||||
# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
@@ -71,7 +73,7 @@ ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = librfdetrcpp-avx.so librfdetrcpp-avx2.so librfdetrcpp-avx512.so librfdetrcpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = librfdetrcpp-fallback.so
|
||||
VARIANT_TARGETS = librfdetrcpp-fallback.dylib
|
||||
endif
|
||||
|
||||
rfdetr-cpp: main.go gorfdetrcpp.go $(VARIANT_TARGETS)
|
||||
@@ -83,7 +85,7 @@ package: rfdetr-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf librfdetrcpp*.so rfdetr-cpp package sources
|
||||
rm -rf librfdetrcpp*.so librfdetrcpp*.dylib rfdetr-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -110,11 +112,19 @@ librfdetrcpp-avx512.so: sources/rt-detr.cpp
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
librfdetrcpp-fallback.dylib: sources/rt-detr.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
|
||||
rm -rfv build-$@
|
||||
else
|
||||
librfdetrcpp-fallback.so: sources/rt-detr.cpp
|
||||
rm -rfv build-$@
|
||||
$(info ${GREEN}I rfdetr-cpp build info:fallback${RESET})
|
||||
SO_TARGET=$@ CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) librfdetrcpp-custom
|
||||
rm -rfv build-$@
|
||||
endif
|
||||
|
||||
librfdetrcpp-custom: CMakeLists.txt
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
@@ -122,7 +132,8 @@ librfdetrcpp-custom: CMakeLists.txt
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/librfdetrcpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/librfdetrcpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: rfdetr-cpp package
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -27,7 +28,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("RFDETR_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./librfdetrcpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./librfdetrcpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./librfdetrcpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
rfdetrLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/librfdetrcpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/librfdetrcpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/librfdetrcpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/rfdetr-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/librfdetrcpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/librfdetrcpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/librfdetrcpp-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/librfdetrcpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export RFDETR_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -31,6 +31,8 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
# macOS/Metal: built + published as an OCI image by CI (includeDarwin in
|
||||
# .github/backend-matrix.yml) so Apple Silicon users can install this backend.
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
@@ -66,7 +68,7 @@ ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libgosam3-fallback.so
|
||||
VARIANT_TARGETS = libgosam3-fallback.dylib
|
||||
endif
|
||||
|
||||
sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
|
||||
@@ -78,7 +80,7 @@ package: sam3-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgosam3*.so sam3-cpp package sources
|
||||
rm -rf libgosam3*.so libgosam3*.dylib sam3-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -105,11 +107,19 @@ libgosam3-avx512.so: sources/sam3.cpp
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
libgosam3-fallback.dylib: sources/sam3.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
|
||||
SO_TARGET=libgosam3-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
||||
rm -rfv build*
|
||||
else
|
||||
libgosam3-fallback.so: sources/sam3.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
|
||||
SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
||||
rm -rfv build*
|
||||
endif
|
||||
|
||||
libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
@@ -117,6 +127,7 @@ libgosam3-custom: CMakeLists.txt cpp/gosam3.cpp cpp/gosam3.h
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgosam3.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: sam3-cpp package
|
||||
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("SAM3_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgosam3-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgosam3-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgosam3-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -10,7 +10,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgosam3-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgosam3-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/sam3-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgosam3-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgosam3-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgosam3-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgosam3-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgosam3-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export SAM3_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -238,11 +239,19 @@ func loadSherpaLibs() error {
|
||||
func loadSherpaLibsOnce() error {
|
||||
shimLib := os.Getenv("SHERPA_SHIM_LIBRARY")
|
||||
if shimLib == "" {
|
||||
shimLib = "libsherpa-shim.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
shimLib = "libsherpa-shim.dylib"
|
||||
} else {
|
||||
shimLib = "libsherpa-shim.so"
|
||||
}
|
||||
}
|
||||
capiLib := os.Getenv("SHERPA_ONNX_LIBRARY")
|
||||
if capiLib == "" {
|
||||
capiLib = "libsherpa-onnx-c-api.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
capiLib = "libsherpa-onnx-c-api.dylib"
|
||||
} else {
|
||||
capiLib = "libsherpa-onnx-c-api.so"
|
||||
}
|
||||
}
|
||||
|
||||
shim, err := purego.Dlopen(shimLib, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -3,7 +3,13 @@ set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
export SHERPA_SHIM_LIBRARY=$CURDIR/lib/libsherpa-shim.dylib
|
||||
export SHERPA_ONNX_LIBRARY=$CURDIR/lib/libsherpa-onnx-c-api.dylib
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=f440ad9c29dd8bc34e5d1f4b863832b96d6ea05f
|
||||
STABLEDIFFUSION_GGML_VERSION?=8caa3f908ae6d4a4bef531e73b9a969f266a3d1f
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
@@ -131,6 +131,7 @@ libgosd-custom: CMakeLists.txt cpp/gosd.cpp cpp/gosd.h
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgosd.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgosd.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
all: stablediffusion-ggml package
|
||||
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("SD_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgosd-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgosd-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgosd-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,6 +12,7 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/libgosd-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgosd-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -avf $CURDIR/stablediffusion-ggml $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
|
||||
@@ -12,9 +12,18 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgosd-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single library variant (Metal or Accelerate). The gosd target is
|
||||
# built as a CMake MODULE, which emits a .dylib for a SHARED build but a
|
||||
# .so for a MODULE build on Apple, so prefer .dylib and fall back to .so.
|
||||
LIBRARY="$CURDIR/libgosd-fallback.dylib"
|
||||
if [ ! -e "$LIBRARY" ]; then
|
||||
LIBRARY="$CURDIR/libgosd-fallback.so"
|
||||
fi
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgosd-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgosd-avx.so ]; then
|
||||
@@ -36,9 +45,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgosd-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export SD_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
@@ -943,7 +944,13 @@ func InitializeONNXRuntime() error {
|
||||
}
|
||||
}
|
||||
if libPath == "" {
|
||||
libPath = "/usr/local/lib/libonnxruntime.so"
|
||||
// LocalAI: default to the platform-native shared library
|
||||
// extension when nothing else is found (dyld vs ld.so).
|
||||
if runtime.GOOS == "darwin" {
|
||||
libPath = "/usr/local/lib/libonnxruntime.dylib"
|
||||
} else {
|
||||
libPath = "/usr/local/lib/libonnxruntime.so"
|
||||
}
|
||||
}
|
||||
}
|
||||
ort.SetSharedLibraryPath(libPath)
|
||||
|
||||
@@ -32,6 +32,10 @@ elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ $(uname -s) = "Darwin" ]; then
|
||||
# macOS: dyld resolves the bundled .dylib via DYLD_LIBRARY_PATH (set in
|
||||
# run.sh); there is no ld.so loader nor glibc to bundle.
|
||||
echo "Detected Darwin"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
|
||||
@@ -3,12 +3,19 @@ set -ex
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS uses dyld: there is no ld.so loader, and the search path env
|
||||
# var is DYLD_LIBRARY_PATH. ONNX Runtime ships as a .dylib here.
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.dylib
|
||||
else
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export ONNXRUNTIME_LIB_PATH=$CURDIR/lib/libonnxruntime.so
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/supertonic "$@"
|
||||
fi
|
||||
fi
|
||||
|
||||
exec $CURDIR/supertonic "$@"
|
||||
|
||||
@@ -70,8 +70,8 @@ UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgovibevoicecpp-avx.so libgovibevoicecpp-avx2.so libgovibevoicecpp-avx512.so libgovibevoicecpp-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libgovibevoicecpp-fallback.so
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant (as a dylib)
|
||||
VARIANT_TARGETS = libgovibevoicecpp-fallback.dylib
|
||||
endif
|
||||
|
||||
vibevoice-cpp: main.go govibevoicecpp.go $(VARIANT_TARGETS)
|
||||
@@ -83,7 +83,7 @@ package: vibevoice-cpp
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgovibevoicecpp*.so package sources/vibevoice.cpp vibevoice-cpp
|
||||
rm -rf libgovibevoicecpp*.so libgovibevoicecpp*.dylib package sources/vibevoice.cpp vibevoice-cpp
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
@@ -119,13 +119,21 @@ libgovibevoicecpp-fallback.so: sources/vibevoice.cpp
|
||||
SO_TARGET=libgovibevoicecpp-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
|
||||
rm -rfv build*
|
||||
|
||||
# Build fallback variant as a dylib (Darwin)
|
||||
libgovibevoicecpp-fallback.dylib: sources/vibevoice.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I vibevoice-cpp build info:fallback (dylib)${RESET})
|
||||
SO_TARGET=libgovibevoicecpp-fallback.dylib CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgovibevoicecpp-custom
|
||||
rm -rfv build*
|
||||
|
||||
libgovibevoicecpp-custom: CMakeLists.txt cpp/govibevoicecpp.cpp cpp/govibevoicecpp.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) --target govibevoicecpp && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET)
|
||||
(mv build-$(SO_TARGET)/libgovibevoicecpp.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgovibevoicecpp.dylib ./$(SO_TARGET) 2>/dev/null)
|
||||
|
||||
test: vibevoice-cpp
|
||||
@echo "Running vibevoice-cpp tests..."
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -21,7 +22,11 @@ type LibFuncs struct {
|
||||
func main() {
|
||||
libName := os.Getenv("VIBEVOICECPP_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgovibevoicecpp-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgovibevoicecpp-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgovibevoicecpp-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
lib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/vibevoice-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgovibevoicecpp-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgovibevoicecpp-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -11,9 +11,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgovibevoicecpp-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgovibevoicecpp-avx.so ]; then
|
||||
@@ -34,9 +38,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgovibevoicecpp-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export VIBEVOICECPP_LIBRARY=$LIBRARY
|
||||
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=bae6bc02b1940bbfb87b6a0299c565e563b916d1
|
||||
WHISPER_CPP_VERSION?=43d78af5be58f41d6ffbc227d608f104577741ea
|
||||
SO_TARGET?=libgowhisper.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
@@ -117,6 +117,7 @@ libgowhisper-custom: CMakeLists.txt cpp/gowhisper.cpp cpp/gowhisper.h
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET)
|
||||
mv build-$(SO_TARGET)/libgowhisper.so ./$(SO_TARGET) 2>/dev/null || \
|
||||
mv build-$(SO_TARGET)/libgowhisper.dylib ./$(SO_TARGET:.so=.dylib)
|
||||
|
||||
all: whisper package
|
||||
|
||||
@@ -4,6 +4,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
@@ -22,7 +23,11 @@ func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("WHISPER_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgowhisper-fallback.so"
|
||||
if runtime.GOOS == "darwin" {
|
||||
libName = "./libgowhisper-fallback.dylib"
|
||||
} else {
|
||||
libName = "./libgowhisper-fallback.so"
|
||||
}
|
||||
}
|
||||
|
||||
gosd, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
|
||||
@@ -12,7 +12,8 @@ REPO_ROOT="${CURDIR}/../../.."
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/whisper $CURDIR/package/
|
||||
cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/
|
||||
cp -fv $CURDIR/libgowhisper-*.so $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/libgowhisper-*.dylib $CURDIR/package/ 2>/dev/null || true
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
|
||||
@@ -12,9 +12,13 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
# macOS: single dylib variant (Metal or Accelerate)
|
||||
LIBRARY="$CURDIR/libgowhisper-fallback.dylib"
|
||||
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||
else
|
||||
LIBRARY="$CURDIR/libgowhisper-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgowhisper-avx.so ]; then
|
||||
@@ -36,9 +40,10 @@ if [ "$(uname)" != "Darwin" ]; then
|
||||
LIBRARY="$CURDIR/libgowhisper-avx512.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export WHISPER_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
|
||||
@@ -340,6 +340,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp"
|
||||
intel: "intel-sycl-f32-sam3-cpp"
|
||||
vulkan: "vulkan-sam3-cpp"
|
||||
metal: "metal-sam3-cpp"
|
||||
- &rfdetrcpp
|
||||
name: "rfdetr-cpp"
|
||||
alias: "rfdetr-cpp"
|
||||
@@ -368,6 +369,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp"
|
||||
intel: "intel-sycl-f32-rfdetr-cpp"
|
||||
vulkan: "vulkan-rfdetr-cpp"
|
||||
metal: "metal-rfdetr-cpp"
|
||||
- &locateanything
|
||||
name: "locate-anything"
|
||||
alias: "locate-anything"
|
||||
@@ -397,6 +399,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-locate-anything-cpp"
|
||||
intel: "intel-sycl-f32-locate-anything-cpp"
|
||||
vulkan: "vulkan-locate-anything-cpp"
|
||||
metal: "metal-locate-anything-cpp"
|
||||
- !!merge <<: *locateanything
|
||||
name: "locate-anything-development"
|
||||
capabilities:
|
||||
@@ -409,6 +412,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-locate-anything-cpp-development"
|
||||
intel: "intel-sycl-f32-locate-anything-cpp-development"
|
||||
vulkan: "vulkan-locate-anything-cpp-development"
|
||||
metal: "metal-locate-anything-cpp-development"
|
||||
- !!merge <<: *locateanything
|
||||
name: "cpu-locate-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-locate-anything-cpp"
|
||||
@@ -419,6 +423,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-locate-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-locate-anything-cpp
|
||||
- !!merge <<: *locateanything
|
||||
name: "metal-locate-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-locate-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-locate-anything-cpp
|
||||
- !!merge <<: *locateanything
|
||||
name: "metal-locate-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-locate-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-locate-anything-cpp
|
||||
- !!merge <<: *locateanything
|
||||
name: "cuda12-locate-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-locate-anything-cpp"
|
||||
@@ -517,6 +531,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp"
|
||||
intel: "intel-sycl-f32-depth-anything-cpp"
|
||||
vulkan: "vulkan-depth-anything-cpp"
|
||||
metal: "metal-depth-anything-cpp"
|
||||
- !!merge <<: *depthanything
|
||||
name: "depth-anything-development"
|
||||
capabilities:
|
||||
@@ -529,6 +544,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-depth-anything-cpp-development"
|
||||
intel: "intel-sycl-f32-depth-anything-cpp-development"
|
||||
vulkan: "vulkan-depth-anything-cpp-development"
|
||||
metal: "metal-depth-anything-cpp-development"
|
||||
- !!merge <<: *depthanything
|
||||
name: "cpu-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-depth-anything-cpp"
|
||||
@@ -539,6 +555,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "metal-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "metal-depth-anything-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-depth-anything-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-depth-anything-cpp
|
||||
- !!merge <<: *depthanything
|
||||
name: "cuda12-depth-anything-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-depth-anything-cpp"
|
||||
@@ -645,6 +671,7 @@
|
||||
nvidia-cuda-13: "cuda13-vllm"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-vllm"
|
||||
cpu: "cpu-vllm"
|
||||
metal: "metal-vllm"
|
||||
- &sglang
|
||||
name: "sglang"
|
||||
license: apache-2.0
|
||||
@@ -1030,6 +1057,8 @@
|
||||
nvidia-l4t: "vulkan-localvqe"
|
||||
nvidia-l4t-cuda-12: "vulkan-localvqe"
|
||||
nvidia-l4t-cuda-13: "vulkan-localvqe"
|
||||
# Apple Silicon: CPU build (LocalVQE has no Metal path); still arm64-native.
|
||||
metal: "metal-localvqe"
|
||||
- &privacyfilter
|
||||
name: "privacy-filter"
|
||||
alias: "privacy-filter"
|
||||
@@ -1066,6 +1095,7 @@
|
||||
amd: "vulkan-privacy-filter"
|
||||
intel: "vulkan-privacy-filter"
|
||||
vulkan: "vulkan-privacy-filter"
|
||||
metal: "metal-privacy-filter"
|
||||
- &faster-whisper
|
||||
icon: https://avatars.githubusercontent.com/u/1520500?s=200&v=4
|
||||
description: |
|
||||
@@ -1284,6 +1314,7 @@
|
||||
nvidia-cuda-13: "cuda13-liquid-audio"
|
||||
nvidia-cuda-12: "cuda12-liquid-audio"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio"
|
||||
metal: "metal-liquid-audio"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/7_6D7rWrLxp2hb6OHSV1p.png
|
||||
- &qwen-tts
|
||||
urls:
|
||||
@@ -1569,6 +1600,7 @@
|
||||
- TTS
|
||||
capabilities:
|
||||
default: "cpu-supertonic"
|
||||
metal: "metal-supertonic"
|
||||
- !!merge <<: *neutts
|
||||
name: "neutts-development"
|
||||
capabilities:
|
||||
@@ -2906,6 +2938,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-privacy-filter"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-vulkan-privacy-filter
|
||||
- !!merge <<: *privacyfilter
|
||||
name: "metal-privacy-filter"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-privacy-filter"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-privacy-filter
|
||||
- !!merge <<: *privacyfilter
|
||||
name: "metal-privacy-filter-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-privacy-filter"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-privacy-filter
|
||||
- !!merge <<: *privacyfilter
|
||||
name: "cuda13-privacy-filter"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-privacy-filter"
|
||||
@@ -2927,6 +2969,17 @@
|
||||
nvidia-cuda-13: "cuda13-vllm-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-vllm-development"
|
||||
cpu: "cpu-vllm-development"
|
||||
metal: "metal-vllm-development"
|
||||
- !!merge <<: *vllm
|
||||
name: "metal-vllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-vllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-vllm
|
||||
- !!merge <<: *vllm
|
||||
name: "metal-vllm-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-vllm"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-vllm
|
||||
- !!merge <<: *vllm
|
||||
name: "cuda12-vllm"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm"
|
||||
@@ -3206,6 +3259,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp-development"
|
||||
intel: "intel-sycl-f32-sam3-cpp-development"
|
||||
vulkan: "vulkan-sam3-cpp-development"
|
||||
metal: "metal-sam3-cpp-development"
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cpu-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-sam3-cpp"
|
||||
@@ -3216,6 +3270,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "metal-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "metal-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda12-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-sam3-cpp"
|
||||
@@ -3289,6 +3353,7 @@
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-rfdetr-cpp-development"
|
||||
intel: "intel-sycl-f32-rfdetr-cpp-development"
|
||||
vulkan: "vulkan-rfdetr-cpp-development"
|
||||
metal: "metal-rfdetr-cpp-development"
|
||||
- !!merge <<: *rfdetrcpp
|
||||
name: "cpu-rfdetr-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr-cpp"
|
||||
@@ -3299,6 +3364,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-rfdetr-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-rfdetr-cpp
|
||||
- !!merge <<: *rfdetrcpp
|
||||
name: "metal-rfdetr-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-rfdetr-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-rfdetr-cpp
|
||||
- !!merge <<: *rfdetrcpp
|
||||
name: "metal-rfdetr-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-rfdetr-cpp
|
||||
- !!merge <<: *rfdetrcpp
|
||||
name: "cuda12-rfdetr-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr-cpp"
|
||||
@@ -4087,6 +4162,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-localvqe"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-vulkan-localvqe
|
||||
- !!merge <<: *localvqecpp
|
||||
name: "metal-localvqe"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-localvqe"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-localvqe
|
||||
- !!merge <<: *localvqecpp
|
||||
name: "metal-localvqe-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-localvqe"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-localvqe
|
||||
## kokoro
|
||||
- !!merge <<: *kokoro
|
||||
name: "kokoro-development"
|
||||
@@ -4612,6 +4697,7 @@
|
||||
nvidia-cuda-13: "cuda13-liquid-audio-development"
|
||||
nvidia-cuda-12: "cuda12-liquid-audio-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-liquid-audio-development"
|
||||
metal: "metal-liquid-audio-development"
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cpu-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-liquid-audio"
|
||||
@@ -4622,6 +4708,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "metal-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "metal-liquid-audio-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-liquid-audio"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-liquid-audio
|
||||
- !!merge <<: *liquid-audio
|
||||
name: "cuda12-liquid-audio"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-liquid-audio"
|
||||
@@ -5282,6 +5378,7 @@
|
||||
nvidia: "cuda12-trl"
|
||||
nvidia-cuda-12: "cuda12-trl"
|
||||
nvidia-cuda-13: "cuda13-trl"
|
||||
metal: "metal-trl"
|
||||
## TRL backend images
|
||||
- !!merge <<: *trl
|
||||
name: "cpu-trl"
|
||||
@@ -5313,6 +5410,16 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-trl"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-trl
|
||||
- !!merge <<: *trl
|
||||
name: "metal-trl"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-trl"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-trl
|
||||
- !!merge <<: *trl
|
||||
name: "metal-trl-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-trl"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-trl
|
||||
## llama.cpp quantization backend
|
||||
- &llama-cpp-quantization
|
||||
name: "llama-cpp-quantization"
|
||||
@@ -5484,6 +5591,7 @@
|
||||
name: "supertonic-development"
|
||||
capabilities:
|
||||
default: "cpu-supertonic-development"
|
||||
metal: "metal-supertonic-development"
|
||||
- !!merge <<: *supertonic
|
||||
name: "cpu-supertonic"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-supertonic"
|
||||
@@ -5494,3 +5602,13 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-supertonic
|
||||
- !!merge <<: *supertonic
|
||||
name: "metal-supertonic"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-supertonic
|
||||
- !!merge <<: *supertonic
|
||||
name: "metal-supertonic-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-supertonic"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-supertonic
|
||||
|
||||
@@ -14,5 +14,11 @@ else
|
||||
fi
|
||||
|
||||
# liquid-audio's torch wheels are large; allow upgrades to satisfy transitive pins
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
||||
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
||||
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
||||
# it on the uv path; Linux/CUDA resolution is unchanged.
|
||||
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
installRequirements
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# MPS (Apple Silicon / Metal) build profile - installed by the darwin CI job.
|
||||
torch>=2.8.0
|
||||
torchaudio>=2.8.0
|
||||
torchcodec>=0.9.1
|
||||
|
||||
@@ -8,7 +8,13 @@ else
|
||||
source $backend_dir/../common/libbackend.sh
|
||||
fi
|
||||
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade"
|
||||
# --index-strategy is a uv-only flag. The darwin/MPS build installs with pip
|
||||
# (USE_PIP=true in scripts/build/python-darwin.sh), which rejects it. Only add
|
||||
# it when uv is the installer, keeping the Linux/CUDA resolution unchanged.
|
||||
if [ "x${USE_PIP:-}" != "xtrue" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
installRequirements
|
||||
|
||||
# Fetch convert_hf_to_gguf.py and gguf package from the same llama.cpp version
|
||||
|
||||
12
backend/python/trl/requirements-mps.txt
Normal file
12
backend/python/trl/requirements-mps.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
torch==2.10.0
|
||||
trl
|
||||
peft
|
||||
datasets>=3.0.0
|
||||
transformers>=4.56.2
|
||||
accelerate>=1.4.0
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
# Note: bitsandbytes is intentionally omitted on MPS. It is only used by the
|
||||
# CUDA (cublas) variants for 8-bit/4-bit quantization and has poor support on
|
||||
# Apple Silicon. torch here uses the plain PyPI wheels, which ship MPS support
|
||||
# on macOS arm64.
|
||||
@@ -457,9 +457,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if last_output is None or not getattr(last_output, "prompt_logprobs", None):
|
||||
context.set_code(grpc.StatusCode.INTERNAL)
|
||||
context.set_details("vLLM did not return prompt_logprobs")
|
||||
_pl = getattr(last_output, "prompt_logprobs", None) if last_output is not None else None
|
||||
# Some engines accept the prompt_logprobs request but return a
|
||||
# list of all-None entries instead of computing them (observed
|
||||
# with vllm-metal's MLX backend on macOS). Treat that as
|
||||
# unsupported rather than silently scoring every candidate as 0.
|
||||
if not _pl or all(e is None for e in _pl):
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details("This backend did not return prompt_logprobs; scoring is unsupported on this engine (e.g. vllm-metal / MLX on macOS).")
|
||||
return backend_pb2.ScoreResponse()
|
||||
|
||||
prompt_logprobs = last_output.prompt_logprobs
|
||||
|
||||
@@ -43,6 +43,24 @@ if [ "x${BUILD_PROFILE}" == "xcublas13" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# Apple Silicon (Metal/MLX) via vllm-metal.
|
||||
# vllm-metal (github.com/vllm-project/vllm-metal) brings vLLM to macOS on Apple
|
||||
# Silicon: it registers through vLLM's platform-plugin entry point
|
||||
# (metal -> vllm_metal:register), MetalPlatform activates, and the vLLM v1
|
||||
# AsyncLLM engine runs on the GPU through MLX. LocalAI's backend.py is UNCHANGED
|
||||
# on darwin — AsyncEngineArgs(...) -> AsyncLLMEngine.from_engine_args transparently
|
||||
# resolves to the MLX engine (proven on a real M4 / macOS 26.5 against Qwen3-0.6B).
|
||||
#
|
||||
# vllm-metal REQUIRES Python 3.12, so force the portable CPython before the venv
|
||||
# is created (ensureVenv reads PYTHON_VERSION/PYTHON_PATCH/PY_STANDALONE_TAG).
|
||||
# The patch + standalone tag mirror the l4t13 cp312 pin — a known-good
|
||||
# python-build-standalone release that also ships an aarch64-apple-darwin asset.
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
fi
|
||||
|
||||
# JetPack 7 / L4T arm64 vllm + torch wheels come straight from PyPI now
|
||||
# (torch 2.11+ ships aarch64 + cu130 manylinux wheels and vllm 0.20+ ships
|
||||
# an aarch64 wheel pinned to that torch). They're cp312-only, so bump the
|
||||
@@ -57,11 +75,87 @@ if [ "x${BUILD_PROFILE}" == "xl4t13" ]; then
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
fi
|
||||
|
||||
# ===================== Apple Silicon (Metal/MLX) =====================
|
||||
# Reproduce vllm-metal's upstream installer
|
||||
# (curl -fsSL https://raw.githubusercontent.com/vllm-project/vllm-metal/main/install.sh)
|
||||
# but INTO LocalAI's managed venv (ensureVenv) instead of a throwaway
|
||||
# ~/.venv-vllm-metal, so the backend integrates with LocalAI's venv lifecycle
|
||||
# (portable CPython, _makeVenvPortable relocation, runtime activation). The
|
||||
# normal CUDA/CPU installRequirements is skipped on darwin — there is no
|
||||
# macOS/arm64 vLLM wheel on PyPI; vLLM is built from source and the MLX engine
|
||||
# is layered on by the vllm-metal wheel.
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
# Create/activate the portable 3.12 venv. On darwin USE_PIP=true and
|
||||
# PORTABLE_PYTHON=true (set by scripts/build/python-darwin.sh), so this is a
|
||||
# `python -m venv` based, relocatable venv.
|
||||
ensureVenv
|
||||
|
||||
# vllm-metal's installer drives everything through `uv`: building vLLM from
|
||||
# the CPU requirements needs `--index-strategy unsafe-best-match` (mixes the
|
||||
# pytorch CPU channel with PyPI), a flag plain pip does not have. The darwin
|
||||
# venv is pip-based, so bootstrap uv into it. uv honours $VIRTUAL_ENV (set by
|
||||
# libbackend's _activateVenv) and installs into THIS venv — same pattern the
|
||||
# intel branch below relies on.
|
||||
pip install uv
|
||||
|
||||
# The ONLY darwin version pin -- AUTO-BUMPED by .github/bump_vllm_metal.sh,
|
||||
# which tracks vllm-project/vllm-metal releases (NOT vllm/vllm latest). Keep
|
||||
# it as a plain double-quoted assignment on its own line so the bumper's sed
|
||||
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
||||
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
||||
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260622062346"
|
||||
|
||||
# The coupled vLLM source version is whatever this vllm-metal release builds
|
||||
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
||||
# the PINNED tag rather than hardcoding a second value that could drift. The
|
||||
# tag is immutable, so this stays reproducible across rebuilds.
|
||||
VLLM_VERSION=$(curl -fsSL "https://raw.githubusercontent.com/vllm-project/vllm-metal/${VLLM_METAL_VERSION}/install.sh" \
|
||||
| grep -oE 'vllm_v="[0-9]+\.[0-9]+\.[0-9]+"' | head -n1 | cut -d'"' -f2)
|
||||
if [ -z "${VLLM_VERSION}" ]; then
|
||||
echo "ERROR: could not derive the vLLM version from vllm-metal ${VLLM_METAL_VERSION}" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "vllm-metal ${VLLM_METAL_VERSION} builds against vLLM ${VLLM_VERSION}"
|
||||
|
||||
_vllm_src=$(mktemp -d)
|
||||
trap 'rm -rf "${_vllm_src}"' EXIT
|
||||
pushd "${_vllm_src}"
|
||||
# 1) Build vLLM ${VLLM_VERSION} from the release source tarball against
|
||||
# the CPU requirements. vllm-metal layers its MLX platform plugin on
|
||||
# top of this exact build.
|
||||
curl -fsSL -o "vllm-${VLLM_VERSION}.tar.gz" \
|
||||
"https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}.tar.gz"
|
||||
tar -xzf "vllm-${VLLM_VERSION}.tar.gz"
|
||||
pushd "vllm-${VLLM_VERSION}"
|
||||
uv pip install -r requirements/cpu.txt --index-strategy unsafe-best-match
|
||||
# -Wno-parentheses: clang on macOS treats one of vLLM's C++ warnings
|
||||
# as an error without it (matches the upstream installer's CXXFLAGS).
|
||||
CXXFLAGS="-Wno-parentheses" uv pip install .
|
||||
popd
|
||||
popd
|
||||
|
||||
# 2) Install the prebuilt vllm-metal wheel for the PINNED release. It pulls
|
||||
# mlx / mlx-metal as deps and registers the `metal` platform plugin that
|
||||
# backend.py resolves to at engine-init time. Build the release-asset URL
|
||||
# deterministically (tag + the cp312/arm64 wheel name) rather than querying
|
||||
# api.github.com, whose unauthenticated rate limit (60/hr per IP) 403s on
|
||||
# shared CI runners. The wheel version is the tag without its leading 'v'.
|
||||
_metal_wheel="vllm_metal-${VLLM_METAL_VERSION#v}-cp312-cp312-macosx_11_0_arm64.whl"
|
||||
_metal_wheel_url="https://github.com/vllm-project/vllm-metal/releases/download/${VLLM_METAL_VERSION}/${_metal_wheel}"
|
||||
echo "Installing vllm-metal wheel: ${_metal_wheel_url}"
|
||||
uv pip install "${_metal_wheel_url}"
|
||||
|
||||
# Generate the gRPC stubs (backend_pb2*). installRequirements normally does
|
||||
# this via runProtogen at the end; we skipped installRequirements on darwin,
|
||||
# so call it explicitly here.
|
||||
runProtogen
|
||||
|
||||
# Intel XPU has no upstream-published vllm wheels, so we always build vllm
|
||||
# from source against torch-xpu and replace the default triton with
|
||||
# triton-xpu (matching torch 2.11). Mirrors the upstream procedure:
|
||||
# https://github.com/vllm-project/vllm/blob/main/docs/getting_started/installation/gpu.xpu.inc.md
|
||||
if [ "x${BUILD_TYPE}" == "xintel" ]; then
|
||||
elif [ "x${BUILD_TYPE}" == "xintel" ]; then
|
||||
# Hide requirements-intel-after.txt so installRequirements doesn't
|
||||
# try `pip install vllm` (would either fail or grab a non-XPU wheel).
|
||||
_intel_after="${backend_dir}/requirements-intel-after.txt"
|
||||
|
||||
@@ -4,4 +4,7 @@
|
||||
# instead — the cublas13 case in install.sh adds --index-strategy=unsafe-best-match
|
||||
# so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://wheels.vllm.ai/0.23.0/cu130
|
||||
# VERSION COUPLING: darwin/Apple-Silicon builds use vllm-metal (see install.sh),
|
||||
# which pins this exact vLLM version. Bumping vllm here means coordinating with a
|
||||
# vllm-metal release that supports the new version, or macOS/Metal builds break.
|
||||
vllm==0.23.0
|
||||
|
||||
@@ -215,6 +215,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
envBackendGalleries := slices.Equal(appConfig.BackendGalleries, startupAppConfig.BackendGalleries)
|
||||
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
|
||||
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
|
||||
envPIIDefaultDetectors := slices.Equal(appConfig.PIIDefaultDetectors, startupAppConfig.PIIDefaultDetectors)
|
||||
envAgentJobRetentionDays := appConfig.AgentJobRetentionDays == startupAppConfig.AgentJobRetentionDays
|
||||
envForceEvictionWhenBusy := appConfig.ForceEvictionWhenBusy == startupAppConfig.ForceEvictionWhenBusy
|
||||
envLRUEvictionMaxRetries := appConfig.LRUEvictionMaxRetries == startupAppConfig.LRUEvictionMaxRetries
|
||||
@@ -335,6 +336,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand
|
||||
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
|
||||
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
|
||||
}
|
||||
if settings.PIIDefaultDetectors != nil && !envPIIDefaultDetectors {
|
||||
// Request-side default redaction reads this live via
|
||||
// ResolvePIIPolicy, so a file edit takes effect on the next chat
|
||||
// request. The MITM listener resolves its per-host detector map
|
||||
// once at start, so a raw file edit reaches cloud-proxy traffic
|
||||
// only after a restart or a POST /api/settings (which rebuilds
|
||||
// the listener) — the admin UI uses the latter.
|
||||
appConfig.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
appConfig.AutoUpgradeBackends = *settings.AutoUpgradeBackends
|
||||
}
|
||||
|
||||
@@ -109,6 +109,52 @@ var _ = Describe("loadRuntimeSettingsFromFile", func() {
|
||||
})
|
||||
})
|
||||
|
||||
// Instance-wide default PII detectors. The file is the only source (no
|
||||
// env var), and the loader runs immediately before startMITMIfConfigured,
|
||||
// so a regression here means the cloud-proxy MITM listener resolves an
|
||||
// empty detector set at boot and forwards intercepted traffic unredacted —
|
||||
// even though pii_default_detectors is on disk and the MITM model has PII
|
||||
// enabled. It also breaks request-side default redaction the same way.
|
||||
Describe("PII default detectors", func() {
|
||||
It("loads pii_default_detectors from the file", func() {
|
||||
cfg := &config.ApplicationConfig{DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["privacy-filter-nemotron", "secret-filter"]}`)}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"privacy-filter-nemotron", "secret-filter"}))
|
||||
})
|
||||
|
||||
It("does not override an env/CLI-set value (LOCALAI_PII_DEFAULT_DETECTORS)", func() {
|
||||
cfg := &config.ApplicationConfig{
|
||||
DynamicConfigsDir: seedSettings(`{"pii_default_detectors": ["from-file"]}`),
|
||||
PIIDefaultDetectors: []string{"from-env"}, // simulate WithPIIDefaultDetectors(env)
|
||||
}
|
||||
loadRuntimeSettingsFromFile(cfg)
|
||||
Expect(cfg.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env var must win over the persisted file value")
|
||||
})
|
||||
})
|
||||
|
||||
// The live file watcher applies pii_default_detectors on a runtime change
|
||||
// the same way it handles galleries/threads/etc.: env-set values (current
|
||||
// == startup snapshot) are left alone, otherwise the file value is applied
|
||||
// to the live config so request-side default redaction picks it up without
|
||||
// a restart.
|
||||
Describe("file watcher: pii_default_detectors", func() {
|
||||
It("applies a changed file value to the live config", func() {
|
||||
startup := config.ApplicationConfig{} // no env baseline
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"old"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["new-a","new-b"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"new-a", "new-b"}))
|
||||
})
|
||||
|
||||
It("leaves an env-controlled value untouched", func() {
|
||||
startup := config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
live := &config.ApplicationConfig{PIIDefaultDetectors: []string{"from-env"}}
|
||||
handler := readRuntimeSettingsJson(startup)
|
||||
Expect(handler([]byte(`{"pii_default_detectors":["from-file"]}`), live)).To(Succeed())
|
||||
Expect(live.PIIDefaultDetectors).To(Equal([]string{"from-env"}), "env-controlled detectors must not be overwritten by the file")
|
||||
})
|
||||
})
|
||||
|
||||
// The Agent Pool block has a mix of zero and non-zero defaults
|
||||
// (Enabled=true, EmbeddingModel="granite-...", MaxChunkingSize=400,
|
||||
// VectorEngine="chromem", AgentHubURL="https://agenthub.localai.io").
|
||||
|
||||
@@ -750,6 +750,20 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
|
||||
options.MITMListen = *settings.MITMListen
|
||||
}
|
||||
|
||||
// Instance-wide default PII detectors. LOCALAI_PII_DEFAULT_DETECTORS (via
|
||||
// WithPIIDefaultDetectors) wins when set; otherwise the file is the source
|
||||
// — apply it only when the env/CLI left the value empty, mirroring the
|
||||
// "env > file" precedence used for the other fields. This must land before
|
||||
// startMITMIfConfigured (called right after this loader): the cloud-proxy
|
||||
// listener resolves each intercept host's detectors once at start via
|
||||
// ResolvePIIPolicy, and a MITM model that names no detectors of its own
|
||||
// falls back to these defaults. Without it the listener (and request-side
|
||||
// default redaction) starts with an empty detector set and forwards
|
||||
// traffic unredacted even though pii_default_detectors is on disk.
|
||||
if settings.PIIDefaultDetectors != nil && len(options.PIIDefaultDetectors) == 0 {
|
||||
options.PIIDefaultDetectors = append([]string(nil), (*settings.PIIDefaultDetectors)...)
|
||||
}
|
||||
|
||||
// Backend upgrade flags
|
||||
if settings.AutoUpgradeBackends != nil {
|
||||
if !options.AutoUpgradeBackends {
|
||||
|
||||
@@ -140,7 +140,7 @@ type RunCMD struct {
|
||||
OIDCIssuer string `env:"LOCALAI_OIDC_ISSUER" help:"OIDC issuer URL for auto-discovery" group:"auth"`
|
||||
OIDCClientID string `env:"LOCALAI_OIDC_CLIENT_ID" help:"OIDC Client ID (auto-enables auth)" group:"auth"`
|
||||
OIDCClientSecret string `env:"LOCALAI_OIDC_CLIENT_SECRET" help:"OIDC Client Secret" group:"auth"`
|
||||
AuthBaseURL string `env:"LOCALAI_BASE_URL" help:"Base URL for OAuth callbacks (e.g. http://localhost:8080)" group:"auth"`
|
||||
ExternalBaseURL string `env:"LOCALAI_BASE_URL" help:"External base URL of this instance (e.g. https://localhost:8080). Used for OAuth callbacks and self-referential links (generated images/videos, job status). When unset, derived from X-Forwarded-Proto/Host or Forwarded headers." group:"api"`
|
||||
AuthAdminEmail string `env:"LOCALAI_ADMIN_EMAIL" help:"Email address to auto-promote to admin role" group:"auth"`
|
||||
AuthRegistrationMode string `env:"LOCALAI_REGISTRATION_MODE" default:"open" help:"Registration mode: 'open' (default), 'approval', or 'invite' (invite code required)" group:"auth"`
|
||||
DisableLocalAuth bool `env:"LOCALAI_DISABLE_LOCAL_AUTH" default:"false" help:"Disable local email/password registration and login (use with OAuth/OIDC-only setups)" group:"auth"`
|
||||
@@ -181,6 +181,8 @@ type RunCMD struct {
|
||||
// Cloud-proxy MITM listener (off by default).
|
||||
MITMListen string `env:"LOCALAI_MITM_LISTEN" help:"Address (host:port) for the cloudproxy MITM listener. Empty = disabled. Clients set HTTPS_PROXY=http://<this>:<port>. Intercept hosts are declared per-model via the model YAML mitm.hosts: block; create one from the Add Model UI." group:"middleware"`
|
||||
MITMCADir string `env:"LOCALAI_MITM_CA_DIR" type:"path" help:"Directory holding the MITM proxy CA cert + key. Defaults to <data-path>/mitm-ca." group:"middleware"`
|
||||
|
||||
PIIDefaultDetectors []string `env:"LOCALAI_PII_DEFAULT_DETECTORS" help:"Instance-wide default PII/secret detector model names applied to any PII-enabled model (chiefly cloud-proxy / MITM models) that names no pii.detectors of its own. Comma-separated, e.g. privacy-filter-nemotron,secret-filter. Takes precedence over the value persisted via the Middleware UI." group:"middleware"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
@@ -243,6 +245,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithAPIAddress(r.Address),
|
||||
config.WithMITMListen(r.MITMListen),
|
||||
config.WithMITMCADir(r.MITMCADir),
|
||||
config.WithPIIDefaultDetectors(r.PIIDefaultDetectors),
|
||||
config.WithAgentJobRetentionDays(r.AgentJobRetentionDays),
|
||||
config.WithLlamaCPPTunnelCallback(func(tunnels []string) {
|
||||
tunnelEnvVar := strings.Join(tunnels, ",")
|
||||
@@ -500,9 +503,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
opts = append(opts, config.WithAuthOIDCClientID(r.OIDCClientID))
|
||||
opts = append(opts, config.WithAuthOIDCClientSecret(r.OIDCClientSecret))
|
||||
}
|
||||
if r.AuthBaseURL != "" {
|
||||
opts = append(opts, config.WithAuthBaseURL(r.AuthBaseURL))
|
||||
}
|
||||
if r.AuthAdminEmail != "" {
|
||||
opts = append(opts, config.WithAuthAdminEmail(r.AuthAdminEmail))
|
||||
}
|
||||
@@ -520,6 +520,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Applied unconditionally: the external base URL governs all self-referential
|
||||
// links (not just OAuth callbacks), so it must take effect even when auth is off.
|
||||
if r.ExternalBaseURL != "" {
|
||||
opts = append(opts, config.WithExternalBaseURL(r.ExternalBaseURL))
|
||||
}
|
||||
|
||||
if idleWatchDog || busyWatchDog {
|
||||
opts = append(opts, config.EnableWatchDog)
|
||||
if idleWatchDog {
|
||||
|
||||
@@ -49,6 +49,13 @@ type ApplicationConfig struct {
|
||||
P2PNetworkID string
|
||||
Federated bool
|
||||
|
||||
// ExternalBaseURL is the externally visible base URL of this instance
|
||||
// (scheme+host[:port]), set via LOCALAI_BASE_URL. When non-empty it is
|
||||
// authoritative for every self-referential URL LocalAI emits (OAuth
|
||||
// callbacks, generated image/video links, async job StatusURLs),
|
||||
// overriding proxy-header detection. Empty = derive from request headers.
|
||||
ExternalBaseURL string
|
||||
|
||||
// DisableStats turns off per-request token tracking. By default the
|
||||
// routing module's billing recorder runs in every mode (including
|
||||
// no-auth single-user) so dashboards and `/api/usage` are immediately
|
||||
@@ -196,7 +203,6 @@ type AuthConfig struct {
|
||||
OIDCIssuer string // OIDC issuer URL for auto-discovery (e.g. https://accounts.google.com)
|
||||
OIDCClientID string
|
||||
OIDCClientSecret string
|
||||
BaseURL string // for OAuth callback URLs (e.g. "http://localhost:8080")
|
||||
AdminEmail string // auto-promote to admin on login
|
||||
RegistrationMode string // "open", "approval" (default when empty), "invite"
|
||||
DisableLocalAuth bool // disable local email/password registration and login
|
||||
@@ -712,6 +718,18 @@ func WithMITMCADir(dir string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithPIIDefaultDetectors sets the instance-wide default PII/secret detector
|
||||
// model names applied to any PII-enabled model (chiefly cloud-proxy / MITM
|
||||
// models) that names no pii.detectors of its own. CLI/env:
|
||||
// LOCALAI_PII_DEFAULT_DETECTORS. Empty leaves the value to
|
||||
// runtime_settings.json / the Middleware UI; a non-empty value takes
|
||||
// precedence over the file (env > file).
|
||||
func WithPIIDefaultDetectors(detectors []string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.PIIDefaultDetectors = detectors
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDir = dynamicConfigsDir
|
||||
@@ -938,9 +956,9 @@ func WithAuthGitHubClientSecret(clientSecret string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithAuthBaseURL(baseURL string) AppOption {
|
||||
func WithExternalBaseURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.Auth.BaseURL = baseURL
|
||||
o.ExternalBaseURL = url
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -9,6 +10,19 @@ import (
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
// HardwareDefaultsDisabled reports whether hardware auto-tuning is turned off via
|
||||
// LOCALAI_DISABLE_HARDWARE_DEFAULTS=true (mirrors LOCALAI_DISABLE_GUESSING). When
|
||||
// set, ApplyHardwareDefaults and the distributed router's node tuning are
|
||||
// skipped entirely, so the backend runs llama.cpp's stock batch/parallel
|
||||
// behavior — an escape hatch for users who want predictable, un-tuned defaults.
|
||||
func HardwareDefaultsDisabled() bool {
|
||||
// Read directly like the sibling LOCALAI_DISABLE_GUESSING toggle in
|
||||
// hooks_llamacpp.go: these config-layer heuristic switches run deep in the
|
||||
// defaults pipeline with no ApplicationConfig in scope to plumb through.
|
||||
//nolint:forbidigo // config-layer heuristic toggle, mirrors LOCALAI_DISABLE_GUESSING
|
||||
return os.Getenv("LOCALAI_DISABLE_HARDWARE_DEFAULTS") == "true"
|
||||
}
|
||||
|
||||
// Hardware-driven model-config defaults.
|
||||
//
|
||||
// This sits alongside the other config overriders (ApplyInferenceDefaults for
|
||||
@@ -54,8 +68,35 @@ func (g GPU) IsNVIDIABlackwell() bool {
|
||||
return maj >= 12
|
||||
}
|
||||
|
||||
// Compute-buffer headroom guard for the raised physical batch.
|
||||
//
|
||||
// Raising n_ubatch grows the CUDA *compute buffer* (the scratch for the forward
|
||||
// graph), which is allocated PER DEVICE — it does not benefit from a second GPU
|
||||
// the way weights or KV (which are split across devices) do. The buffer scales
|
||||
// ~linearly with n_ubatch * n_ctx, so a large context turns the GB10-tuned
|
||||
// ub2048 into multi-GiB of extra scratch that must fit on a SINGLE card. On a
|
||||
// 16 GiB consumer Blackwell with a 200k context that overflows (issue #10485),
|
||||
// even though the GB10 it was measured on (128 GiB unified memory) had room.
|
||||
//
|
||||
// These constants size a conservative guard: only raise the batch when the
|
||||
// extra scratch fits the per-device VRAM ceiling.
|
||||
const (
|
||||
// computeBufferBytesPerCell approximates the CUDA compute-buffer cost of one
|
||||
// (n_ubatch * n_ctx) cell. Derived from an observed allocation (ub2048 *
|
||||
// ctx204800 ~= 4.5 GiB => ~11 B/cell) and rounded up to 16 for margin, since
|
||||
// the real cost also grows with model width (heads / embedding dim) which we
|
||||
// don't know at config time.
|
||||
computeBufferBytesPerCell = 16
|
||||
// blackwellBatchHeadroomDivisor caps the extra compute buffer from raising the
|
||||
// physical batch at VRAM/divisor. /4 keeps the bulk of a device for weights +
|
||||
// KV, which already dominate VRAM use.
|
||||
blackwellBatchHeadroomDivisor = 4
|
||||
)
|
||||
|
||||
// PhysicalBatch returns the canonical physical batch (n_batch/n_ubatch) for the
|
||||
// given hardware, used when the model config leaves batch unset.
|
||||
// given hardware class, ignoring context/VRAM headroom. Use
|
||||
// PhysicalBatchForContext when a model context and per-device VRAM are known
|
||||
// (the load paths) so the raised batch can't overflow a single device.
|
||||
func PhysicalBatch(g GPU) int {
|
||||
if g.IsNVIDIABlackwell() {
|
||||
return BlackwellPhysicalBatch
|
||||
@@ -63,6 +104,51 @@ func PhysicalBatch(g GPU) int {
|
||||
return DefaultPhysicalBatch
|
||||
}
|
||||
|
||||
// PhysicalBatchForContext is PhysicalBatch gated on per-device VRAM headroom for
|
||||
// the given context: it only raises the batch above the conservative default
|
||||
// when the extra compute buffer (which is allocated on a single device and grows
|
||||
// with n_ubatch * n_ctx) fits within blackwellBatchHeadroomDivisor of the GPU's
|
||||
// VRAM. g.VRAM must be the PER-DEVICE ceiling (the smallest device on a
|
||||
// multi-GPU host), not the summed total — the compute buffer can't be split.
|
||||
//
|
||||
// VRAM 0 (unknown) stays conservative rather than risk a per-device OOM; the
|
||||
// GB10 / unified-memory path reports system RAM, so it still clears the guard.
|
||||
func PhysicalBatchForContext(g GPU, ctx int) int {
|
||||
if !g.IsNVIDIABlackwell() {
|
||||
return DefaultPhysicalBatch
|
||||
}
|
||||
if g.VRAM == 0 {
|
||||
return DefaultPhysicalBatch
|
||||
}
|
||||
if largeContextForDevice(g, ctx) {
|
||||
return DefaultPhysicalBatch
|
||||
}
|
||||
return BlackwellPhysicalBatch
|
||||
}
|
||||
|
||||
// largeContextForDevice reports whether the given context is large relative to
|
||||
// the per-device VRAM ceiling — the shared "tight single-model fit" signal that
|
||||
// suppresses BOTH throughput-oriented defaults (the Blackwell batch boost and
|
||||
// the concurrency slot count). It sizes the extra compute-buffer scratch a
|
||||
// raised batch would need at this context (which grows ~n_ubatch * n_ctx and
|
||||
// is allocated per device) and asks whether it overflows a fraction of the
|
||||
// device VRAM; when it does, the device has no headroom to spend on throughput
|
||||
// and the conservative defaults must hold (issue #10485).
|
||||
//
|
||||
// g.VRAM must be the PER-DEVICE ceiling (the smallest device on a multi-GPU
|
||||
// host). VRAM 0 (unknown) is treated as not-large so detection gaps don't
|
||||
// silently disable the defaults.
|
||||
func largeContextForDevice(g GPU, ctx int) bool {
|
||||
if g.VRAM == 0 {
|
||||
return false
|
||||
}
|
||||
if ctx <= 0 {
|
||||
ctx = DefaultContextSize
|
||||
}
|
||||
extra := uint64(ctx) * uint64(BlackwellPhysicalBatch-DefaultPhysicalBatch) * computeBufferBytesPerCell
|
||||
return extra > g.VRAM/blackwellBatchHeadroomDivisor
|
||||
}
|
||||
|
||||
// IsManagedPhysicalBatch reports whether n is a value PhysicalBatch assigns.
|
||||
// Callers that re-tune a value chosen by an upstream host (the distributed
|
||||
// router correcting the frontend's guess) use this to avoid clobbering an
|
||||
@@ -99,17 +185,50 @@ func DefaultParallelSlots(g GPU) int {
|
||||
}
|
||||
}
|
||||
|
||||
// EnsureParallelOption appends a VRAM-scaled "parallel:N" backend option when the
|
||||
// model doesn't already set one (and the GPU warrants concurrency). Returns the
|
||||
// possibly-extended options. Shared by the single-host config path
|
||||
// (ApplyHardwareDefaults) and the distributed router (per selected node).
|
||||
func EnsureParallelOption(opts []string, gpu GPU) []string {
|
||||
if slots := DefaultParallelSlots(gpu); slots > 1 && !hasParallelOption(opts) {
|
||||
// ParallelSlotsForContext is DefaultParallelSlots gated on per-device VRAM
|
||||
// headroom for the given context. A large context already claims most of a
|
||||
// single device's VRAM (the KV cache plus the per-slot compute/checkpoint
|
||||
// scratch that scales with n_seq_max), so defaulting multiple slots there
|
||||
// pushes a tight single-model fit into per-device CUDA OOM (issue #10485): the
|
||||
// model loads but the final allocation (e.g. an MTP draft context's KV cache)
|
||||
// overflows the tighter card by a few hundred MiB. Returns 1 (no concurrency)
|
||||
// in that tight regime, otherwise the VRAM-scaled DefaultParallelSlots.
|
||||
//
|
||||
// g.VRAM must be the PER-DEVICE ceiling (smallest device on a multi-GPU host).
|
||||
// It shares largeContextForDevice with the batch boost so both throughput
|
||||
// defaults are suppressed together; the GB10 / unified-memory path reports
|
||||
// system RAM and so keeps full concurrency even at large contexts.
|
||||
func ParallelSlotsForContext(g GPU, ctx int) int {
|
||||
slots := DefaultParallelSlots(g)
|
||||
if slots <= 1 || g.VRAM == 0 {
|
||||
return slots
|
||||
}
|
||||
if largeContextForDevice(g, ctx) {
|
||||
return 1
|
||||
}
|
||||
return slots
|
||||
}
|
||||
|
||||
// EnsureParallelOptionForContext appends a VRAM-scaled "parallel:N" backend
|
||||
// option when the model doesn't already set one and the GPU warrants (and has
|
||||
// headroom for) concurrency at this context. Returns the possibly-extended
|
||||
// options. Shared by the single-host config path (ApplyHardwareDefaults) and
|
||||
// the distributed router (per selected node).
|
||||
func EnsureParallelOptionForContext(opts []string, gpu GPU, ctx int) []string {
|
||||
if slots := ParallelSlotsForContext(gpu, ctx); slots > 1 && !hasParallelOption(opts) {
|
||||
return append(opts, fmt.Sprintf("parallel:%d", slots))
|
||||
}
|
||||
return opts
|
||||
}
|
||||
|
||||
// EnsureParallelOption is EnsureParallelOptionForContext with no known context
|
||||
// (defaults to DefaultContextSize, which clears the headroom gate on any device
|
||||
// large enough to warrant concurrency). Kept for callers without a model
|
||||
// context.
|
||||
func EnsureParallelOption(opts []string, gpu GPU) []string {
|
||||
return EnsureParallelOptionForContext(opts, gpu, 0)
|
||||
}
|
||||
|
||||
// hasParallelOption reports whether the model already sets parallel/n_parallel
|
||||
// so we never override an explicit value (helper shared with serving_defaults.go).
|
||||
func hasParallelOption(opts []string) bool {
|
||||
@@ -122,7 +241,12 @@ func hasParallelOption(opts []string) bool {
|
||||
// deterministic device — detection does a live nvidia-smi call.
|
||||
var localGPU = func() GPU {
|
||||
vendor, _ := xsysinfo.DetectGPUVendor()
|
||||
vram, _ := xsysinfo.TotalAvailableVRAM()
|
||||
// Use the SMALLEST device's VRAM, not the summed total: the parallel-slot
|
||||
// tier and the batch headroom guard both reason about what fits on a single
|
||||
// card, and per-device compute buffers can't be split across GPUs. Summing
|
||||
// two 16 GiB cards into "32 GiB" is what over-provisioned multi-GPU hosts
|
||||
// into OOM (issue #10485).
|
||||
vram, _ := xsysinfo.MinPerGPUVRAM()
|
||||
return GPU{
|
||||
Vendor: vendor,
|
||||
ComputeCapability: xsysinfo.NVIDIAComputeCapability(),
|
||||
@@ -134,25 +258,36 @@ var localGPU = func() GPU {
|
||||
// and were left unset by the user. Currently: a larger physical batch on
|
||||
// Blackwell. Explicit config always wins (we only touch zero values).
|
||||
func ApplyHardwareDefaults(cfg *ModelConfig, gpu GPU) {
|
||||
if cfg == nil {
|
||||
if cfg == nil || HardwareDefaultsDisabled() {
|
||||
return
|
||||
}
|
||||
if cfg.Batch == 0 && gpu.IsNVIDIABlackwell() {
|
||||
cfg.Batch = BlackwellPhysicalBatch
|
||||
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
|
||||
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability)
|
||||
// Raise the physical batch on Blackwell only when the resulting compute
|
||||
// buffer fits the per-device VRAM at THIS model's context. Leaving Batch at 0
|
||||
// (rather than writing the default 512) preserves the downstream single-pass
|
||||
// sizing in core/backend.EffectiveBatchSize for embedding/score/rerank.
|
||||
ctx := DefaultContextSize
|
||||
if cfg.ContextSize != nil {
|
||||
ctx = *cfg.ContextSize
|
||||
}
|
||||
if cfg.Batch == 0 {
|
||||
if PhysicalBatchForContext(gpu, ctx) == BlackwellPhysicalBatch {
|
||||
cfg.Batch = BlackwellPhysicalBatch
|
||||
xlog.Debug("[hardware_defaults] Blackwell GPU: defaulting physical batch",
|
||||
"batch", cfg.Batch, "compute_cap", gpu.ComputeCapability, "context", ctx, "vram_gib", gpu.VRAM>>30)
|
||||
}
|
||||
}
|
||||
|
||||
// Enable concurrent serving by default on a capable GPU: without this the
|
||||
// llama.cpp backend runs n_parallel=1 and serializes multi-user requests
|
||||
// (continuous batching stays off). Unified KV means the slots share the
|
||||
// context budget, so this is concurrency without extra KV memory. Explicit
|
||||
// parallel/n_parallel in the model options always wins.
|
||||
// context budget, but a context large enough to fill a single device leaves
|
||||
// no room for the per-slot scratch, so the slot count is gated on per-device
|
||||
// headroom too (issue #10485). Explicit parallel/n_parallel always wins.
|
||||
if before := len(cfg.Options); true {
|
||||
cfg.Options = EnsureParallelOption(cfg.Options, gpu)
|
||||
cfg.Options = EnsureParallelOptionForContext(cfg.Options, gpu, ctx)
|
||||
if len(cfg.Options) > before {
|
||||
xlog.Debug("[hardware_defaults] defaulting parallel slots for concurrent serving",
|
||||
"option", cfg.Options[len(cfg.Options)-1], "vram_gib", gpu.VRAM>>30)
|
||||
"option", cfg.Options[len(cfg.Options)-1], "context", ctx, "vram_gib", gpu.VRAM>>30)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,26 +9,37 @@ import (
|
||||
// GPU. The detection seam (localGPU) is injected so the path is deterministic
|
||||
// without a real GPU.
|
||||
var _ = Describe("SetDefaults hardware defaults (single-instance)", func() {
|
||||
const gib = uint64(1) << 30
|
||||
|
||||
var orig func() GPU
|
||||
BeforeEach(func() { orig = localGPU })
|
||||
AfterEach(func() { localGPU = orig })
|
||||
|
||||
It("sets the physical batch on a local Blackwell GPU", func() {
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
|
||||
It("sets the physical batch on a local Blackwell GPU with headroom", func() {
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
|
||||
cfg := &ModelConfig{}
|
||||
cfg.SetDefaults()
|
||||
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
||||
})
|
||||
|
||||
It("leaves batch unset when a large context would overflow the device", func() {
|
||||
// Regression guard for issue #10485: 16 GiB consumer Blackwell + ~200k ctx.
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.0", VRAM: 16 * gib} }
|
||||
ctx := 204800
|
||||
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
||||
cfg.SetDefaults()
|
||||
Expect(cfg.Batch).To(Equal(0))
|
||||
})
|
||||
|
||||
It("leaves batch unset on a non-Blackwell local GPU", func() {
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "8.9"} }
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "8.9", VRAM: 119 * gib} }
|
||||
cfg := &ModelConfig{}
|
||||
cfg.SetDefaults()
|
||||
Expect(cfg.Batch).To(Equal(0))
|
||||
})
|
||||
|
||||
It("never overrides an explicit batch", func() {
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1"} }
|
||||
localGPU = func() GPU { return GPU{ComputeCapability: "12.1", VRAM: 119 * gib} }
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Batch = 1024
|
||||
cfg.SetDefaults()
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
)
|
||||
|
||||
var _ = Describe("Hardware-driven config defaults", func() {
|
||||
const gib = uint64(1) << 30
|
||||
|
||||
DescribeTable("GPU.IsNVIDIABlackwell (sm_12x consumer family)",
|
||||
func(cc string, want bool) {
|
||||
Expect(GPU{ComputeCapability: cc}.IsNVIDIABlackwell()).To(Equal(want))
|
||||
@@ -35,29 +37,69 @@ var _ = Describe("Hardware-driven config defaults", func() {
|
||||
})
|
||||
})
|
||||
|
||||
Describe("PhysicalBatchForContext (per-device VRAM headroom)", func() {
|
||||
It("raises the batch when the compute buffer fits the device", func() {
|
||||
// 16 GiB Blackwell with a small context: the extra scratch is tiny.
|
||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 8192)).
|
||||
To(Equal(BlackwellPhysicalBatch))
|
||||
})
|
||||
It("keeps the default batch when a large context would overflow one device", func() {
|
||||
// The issue #10485 case: 16 GiB consumer Blackwell, ~200k context.
|
||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.0", VRAM: 16 * gib}, 204800)).
|
||||
To(Equal(DefaultPhysicalBatch))
|
||||
})
|
||||
It("still raises the batch on a large unified-memory device (GB10)", func() {
|
||||
// GB10 reports system RAM (~119 GiB) as its single device's VRAM.
|
||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1", VRAM: 119 * gib}, 204800)).
|
||||
To(Equal(BlackwellPhysicalBatch))
|
||||
})
|
||||
It("stays conservative when VRAM is unknown", func() {
|
||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "12.1"}, 8192)).
|
||||
To(Equal(DefaultPhysicalBatch))
|
||||
})
|
||||
It("never raises the batch on non-Blackwell", func() {
|
||||
Expect(PhysicalBatchForContext(GPU{ComputeCapability: "9.0", VRAM: 80 * gib}, 8192)).
|
||||
To(Equal(DefaultPhysicalBatch))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ApplyHardwareDefaults", func() {
|
||||
It("raises an unset batch to 2048 on Blackwell", func() {
|
||||
It("raises an unset batch to 2048 on Blackwell with headroom", func() {
|
||||
cfg := &ModelConfig{}
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
||||
Expect(cfg.Batch).To(Equal(BlackwellPhysicalBatch))
|
||||
})
|
||||
It("leaves batch unset when a large context would overflow one device", func() {
|
||||
// Regression guard for issue #10485: 16 GiB card + ~200k context.
|
||||
ctx := 204800
|
||||
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.0", VRAM: 16 * gib})
|
||||
Expect(cfg.Batch).To(Equal(0))
|
||||
})
|
||||
It("leaves batch unset on non-Blackwell", func() {
|
||||
cfg := &ModelConfig{}
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0"})
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "9.0", VRAM: 119 * gib})
|
||||
Expect(cfg.Batch).To(Equal(0))
|
||||
})
|
||||
It("never overrides an explicit batch", func() {
|
||||
cfg := &ModelConfig{}
|
||||
cfg.Batch = 1024
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1"})
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
||||
Expect(cfg.Batch).To(Equal(1024))
|
||||
})
|
||||
It("no-ops on nil", func() {
|
||||
Expect(func() { ApplyHardwareDefaults(nil, GPU{ComputeCapability: "12.1"}) }).ToNot(Panic())
|
||||
})
|
||||
})
|
||||
|
||||
const gib = uint64(1) << 30
|
||||
It("applies nothing when hardware defaults are disabled via env", func() {
|
||||
GinkgoT().Setenv("LOCALAI_DISABLE_HARDWARE_DEFAULTS", "true")
|
||||
Expect(HardwareDefaultsDisabled()).To(BeTrue())
|
||||
cfg := &ModelConfig{}
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
||||
Expect(cfg.Batch).To(Equal(0))
|
||||
Expect(cfg.Options).To(BeEmpty())
|
||||
})
|
||||
})
|
||||
|
||||
DescribeTable("DefaultParallelSlots (by VRAM)",
|
||||
func(vramGiB uint64, want int) {
|
||||
@@ -72,12 +114,46 @@ var _ = Describe("Hardware-driven config defaults", func() {
|
||||
Entry("unknown 0", uint64(0), 1),
|
||||
)
|
||||
|
||||
Describe("ParallelSlotsForContext (per-device VRAM headroom)", func() {
|
||||
It("keeps the VRAM-scaled slot count when the context fits the device", func() {
|
||||
// 16 GiB card, small context: plenty of room for concurrency.
|
||||
Expect(ParallelSlotsForContext(GPU{VRAM: 16 * gib}, 8192)).To(Equal(4))
|
||||
})
|
||||
It("drops to a single slot when a large context already fills the device", func() {
|
||||
// Regression guard for issue #10485: 16 GiB consumer Blackwell, ~200k
|
||||
// context. Even with unified KV, the per-slot compute/checkpoint
|
||||
// scratch from 4 slots is the straw that overflows the tighter device.
|
||||
Expect(ParallelSlotsForContext(GPU{VRAM: 16 * gib}, 204800)).To(Equal(1))
|
||||
})
|
||||
It("keeps concurrency on a large unified-memory device (GB10)", func() {
|
||||
// GB10 reports system RAM (~119 GiB): a 200k context leaves headroom.
|
||||
Expect(ParallelSlotsForContext(GPU{VRAM: 119 * gib}, 204800)).To(Equal(8))
|
||||
})
|
||||
It("keeps concurrency on a big datacenter card with a large context", func() {
|
||||
// 80 GiB A100: 200k context is a small fraction, concurrency stays.
|
||||
Expect(ParallelSlotsForContext(GPU{VRAM: 80 * gib}, 204800)).To(Equal(8))
|
||||
})
|
||||
It("stays a single slot on small/unknown VRAM regardless of context", func() {
|
||||
Expect(ParallelSlotsForContext(GPU{VRAM: 2 * gib}, 8192)).To(Equal(1))
|
||||
Expect(ParallelSlotsForContext(GPU{}, 8192)).To(Equal(1))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("ApplyHardwareDefaults parallel slots", func() {
|
||||
It("adds a VRAM-scaled parallel option on a capable GPU", func() {
|
||||
cfg := &ModelConfig{}
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.1", VRAM: 119 * gib})
|
||||
Expect(cfg.Options).To(ContainElement("parallel:8"))
|
||||
})
|
||||
It("adds no parallel option when a large context already fills one device", func() {
|
||||
// Regression guard for issue #10485: 16 GiB card + ~200k context. The
|
||||
// model barely fits; defaulting concurrency tips the tighter GPU into
|
||||
// CUDA OOM during the final (MTP draft) KV allocation.
|
||||
ctx := 204800
|
||||
cfg := &ModelConfig{LLMConfig: LLMConfig{ContextSize: &ctx}}
|
||||
ApplyHardwareDefaults(cfg, GPU{ComputeCapability: "12.0", VRAM: 16 * gib})
|
||||
Expect(cfg.Options).ToNot(ContainElement(ContainSubstring("parallel")))
|
||||
})
|
||||
It("scales the slot count down with VRAM", func() {
|
||||
cfg := &ModelConfig{}
|
||||
ApplyHardwareDefaults(cfg, GPU{VRAM: 24 * gib})
|
||||
|
||||
@@ -1204,11 +1204,6 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
// This ensures gallery-installed and runtime-loaded models get optimal parameters.
|
||||
ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
|
||||
|
||||
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell).
|
||||
// Uses the local GPU here; in distributed mode the router re-applies the same
|
||||
// heuristics for the selected node's GPU before loading. Explicit config wins.
|
||||
ApplyHardwareDefaults(cfg, localGPU())
|
||||
|
||||
// Apply serving-policy defaults (device-independent): cross-request prefix
|
||||
// caching. Propagates to distributed nodes via the model options.
|
||||
ApplyServingDefaults(cfg)
|
||||
@@ -1247,6 +1242,16 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.ContextSize = &ctx
|
||||
}
|
||||
runBackendHooks(cfg, lo.modelPath)
|
||||
|
||||
// Apply hardware-driven defaults (e.g. a larger physical batch on Blackwell)
|
||||
// LAST, after the context size is fully resolved (explicit config, LoadOptions,
|
||||
// then the GGUF guess inside runBackendHooks): the Blackwell batch guard sizes
|
||||
// the per-device compute buffer against this model's context, so it must see
|
||||
// the final value, not a pre-guess nil. Uses the local GPU here; in distributed
|
||||
// mode the router re-applies the same heuristics for the selected node's GPU
|
||||
// before loading. Explicit config always wins.
|
||||
ApplyHardwareDefaults(cfg, localGPU())
|
||||
|
||||
cfg.syncKnownUsecasesFromString()
|
||||
}
|
||||
|
||||
|
||||
@@ -149,6 +149,18 @@ func API(application *application.Application) (*echo.Echo, error) {
|
||||
// Middleware - StripPathPrefix must be registered early as it uses Rewrite which runs before routing
|
||||
e.Pre(httpMiddleware.StripPathPrefix())
|
||||
|
||||
// Stamp the configured external base URL into each request context so
|
||||
// middleware.BaseURL can treat it as authoritative for self-referential
|
||||
// links. Registered as Pre so it runs before routing and handlers.
|
||||
if extBaseURL := application.ApplicationConfig().ExternalBaseURL; extBaseURL != "" {
|
||||
e.Pre(func(next echo.HandlerFunc) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
c.Set("_external_base_url", extBaseURL)
|
||||
return next(c)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
e.Pre(middleware.RemoveTrailingSlash())
|
||||
|
||||
if application.ApplicationConfig().MachineTag != "" {
|
||||
|
||||
@@ -3,10 +3,51 @@
|
||||
package auth
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"gorm.io/driver/sqlite"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
func openSQLiteDialector(path string) (gorm.Dialector, error) {
|
||||
return sqlite.Open(path), nil
|
||||
return sqlite.Open(buildSQLiteDSN(path)), nil
|
||||
}
|
||||
|
||||
// buildSQLiteDSN augments a SQLite file path with connection pragmas that make
|
||||
// the auth DB resilient on slow or contended storage.
|
||||
//
|
||||
// - _busy_timeout=5000 makes SQLite retry for up to 5s on SQLITE_BUSY instead
|
||||
// of failing immediately. Network-backed storage (SMB/CIFS/NFS, e.g. Azure
|
||||
// Files) is prone to transient lock contention during migration (see #10506).
|
||||
// - _txlock=immediate takes the write lock at BEGIN, avoiding deadlocks when a
|
||||
// read transaction later upgrades to a write during AutoMigrate.
|
||||
//
|
||||
// We deliberately do NOT set WAL journal mode: WAL relies on a shared-memory
|
||||
// mmap that does not work over SMB/NFS, which is exactly the failing case here.
|
||||
//
|
||||
// Caller-supplied values for either pragma are preserved.
|
||||
func buildSQLiteDSN(path string) string {
|
||||
base := path
|
||||
rawQuery := ""
|
||||
if i := strings.IndexByte(path, '?'); i >= 0 {
|
||||
base = path[:i]
|
||||
rawQuery = path[i+1:]
|
||||
}
|
||||
|
||||
values, err := url.ParseQuery(rawQuery)
|
||||
if err != nil {
|
||||
// An unparseable query string means a hand-crafted DSN we should not
|
||||
// risk corrupting; leave it untouched.
|
||||
return path
|
||||
}
|
||||
|
||||
if values.Get("_busy_timeout") == "" {
|
||||
values.Set("_busy_timeout", "5000")
|
||||
}
|
||||
if values.Get("_txlock") == "" {
|
||||
values.Set("_txlock", "immediate")
|
||||
}
|
||||
|
||||
return base + "?" + values.Encode()
|
||||
}
|
||||
|
||||
57
core/http/auth/db_sqlite_test.go
Normal file
57
core/http/auth/db_sqlite_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
//go:build auth
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
// parseDSN splits a "base?query" DSN into its base and decoded query values so
|
||||
// assertions don't depend on url.Values.Encode()'s key ordering.
|
||||
func parseDSN(dsn string) (string, url.Values) {
|
||||
base := dsn
|
||||
rawQuery := ""
|
||||
if i := strings.IndexByte(dsn, '?'); i >= 0 {
|
||||
base = dsn[:i]
|
||||
rawQuery = dsn[i+1:]
|
||||
}
|
||||
values, err := url.ParseQuery(rawQuery)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
return base, values
|
||||
}
|
||||
|
||||
var _ = Describe("buildSQLiteDSN", func() {
|
||||
It("adds busy_timeout and txlock to a plain file path", func() {
|
||||
base, values := parseDSN(buildSQLiteDSN("/data/database.db"))
|
||||
Expect(base).To(Equal("/data/database.db"))
|
||||
Expect(values.Get("_busy_timeout")).To(Equal("5000"))
|
||||
Expect(values.Get("_txlock")).To(Equal("immediate"))
|
||||
})
|
||||
|
||||
It("adds pragmas to an in-memory database", func() {
|
||||
base, values := parseDSN(buildSQLiteDSN(":memory:"))
|
||||
Expect(base).To(Equal(":memory:"))
|
||||
Expect(values.Get("_busy_timeout")).To(Equal("5000"))
|
||||
Expect(values.Get("_txlock")).To(Equal("immediate"))
|
||||
})
|
||||
|
||||
It("preserves an existing query string", func() {
|
||||
base, values := parseDSN(buildSQLiteDSN("/data/database.db?cache=shared"))
|
||||
Expect(base).To(Equal("/data/database.db"))
|
||||
Expect(values.Get("cache")).To(Equal("shared"))
|
||||
Expect(values.Get("_busy_timeout")).To(Equal("5000"))
|
||||
Expect(values.Get("_txlock")).To(Equal("immediate"))
|
||||
})
|
||||
|
||||
It("does not override a caller-supplied busy_timeout or txlock", func() {
|
||||
_, values := parseDSN(buildSQLiteDSN("/data/database.db?_busy_timeout=1000&_txlock=deferred"))
|
||||
Expect(values["_busy_timeout"]).To(HaveLen(1), "_busy_timeout should not be duplicated")
|
||||
Expect(values.Get("_busy_timeout")).To(Equal("1000"))
|
||||
Expect(values["_txlock"]).To(HaveLen(1), "_txlock should not be duplicated")
|
||||
Expect(values.Get("_txlock")).To(Equal("deferred"))
|
||||
})
|
||||
})
|
||||
@@ -70,7 +70,7 @@ func UploadToCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
name := c.Param("name")
|
||||
name := decodedParam(c, "name")
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": "file required"})
|
||||
@@ -116,7 +116,7 @@ func ListCollectionEntriesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, c.Param("name"))
|
||||
entries, err := svc.ListCollectionEntriesForUser(userID, decodedParam(c, "name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -139,7 +139,7 @@ func GetCollectionEntryContentEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, c.Param("name"), entry)
|
||||
content, chunkCount, err := svc.GetCollectionEntryContentForUser(userID, decodedParam(c, "name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -164,7 +164,7 @@ func SearchCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
results, err := svc.SearchCollectionForUser(userID, c.Param("name"), payload.Query, payload.MaxResults)
|
||||
results, err := svc.SearchCollectionForUser(userID, decodedParam(c, "name"), payload.Query, payload.MaxResults)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -182,7 +182,7 @@ func ResetCollectionEndpoint(app *application.Application) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
if err := svc.ResetCollectionForUser(userID, c.Param("name")); err != nil {
|
||||
if err := svc.ResetCollectionForUser(userID, decodedParam(c, "name")); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -202,7 +202,7 @@ func DeleteCollectionEntryEndpoint(app *application.Application) echo.HandlerFun
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, c.Param("name"), payload.Entry)
|
||||
remaining, err := svc.DeleteCollectionEntryForUser(userID, decodedParam(c, "name"), payload.Entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -230,7 +230,7 @@ func AddCollectionSourceEndpoint(app *application.Application) echo.HandlerFunc
|
||||
if payload.UpdateInterval < 1 {
|
||||
payload.UpdateInterval = 60
|
||||
}
|
||||
if err := svc.AddCollectionSourceForUser(userID, c.Param("name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if err := svc.AddCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL, payload.UpdateInterval); err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
}
|
||||
@@ -250,7 +250,7 @@ func RemoveCollectionSourceEndpoint(app *application.Application) echo.HandlerFu
|
||||
if err := c.Bind(&payload); err != nil {
|
||||
return c.JSON(http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
}
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, c.Param("name"), payload.URL); err != nil {
|
||||
if err := svc.RemoveCollectionSourceForUser(userID, decodedParam(c, "name"), payload.URL); err != nil {
|
||||
return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
|
||||
}
|
||||
return c.JSON(http.StatusOK, map[string]string{"status": "ok"})
|
||||
@@ -267,7 +267,7 @@ func GetCollectionEntryRawFileEndpoint(app *application.Application) echo.Handle
|
||||
if err != nil {
|
||||
entry = entryParam
|
||||
}
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, c.Param("name"), entry)
|
||||
fpath, err := svc.GetCollectionEntryFilePathForUser(userID, decodedParam(c, "name"), entry)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
@@ -282,7 +282,7 @@ func ListCollectionSourcesEndpoint(app *application.Application) echo.HandlerFun
|
||||
return func(c echo.Context) error {
|
||||
svc := app.AgentPoolService()
|
||||
userID := effectiveUserID(c)
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, c.Param("name"))
|
||||
sources, err := svc.ListCollectionSourcesForUser(userID, decodedParam(c, "name"))
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return c.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user