mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
feat(sam.cpp): add sam.cpp detection backend (#9288)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
13a6ed709c
commit
706cf5d43c
105
.github/workflows/backend.yml
vendored
105
.github/workflows/backend.yml
vendored
@@ -574,6 +574,19 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-sam3-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
@@ -1147,6 +1160,32 @@ jobs:
|
||||
backend: "stablediffusion-ggml"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-sam3-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-sam3-cpp'
|
||||
base-image: "ubuntu:24.04"
|
||||
ubuntu-version: '2404'
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
@@ -1907,6 +1946,59 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# sam3-cpp
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-sam3-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f32-sam3-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f16'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-intel-sycl-f16-sam3-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-sam3-cpp'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'sycl_f32'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
@@ -1959,6 +2051,19 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-sam3-cpp'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "sam3-cpp"
|
||||
dockerfile: "./backend/Dockerfile.golang"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
# whisper
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
|
||||
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -34,6 +34,10 @@ jobs:
|
||||
variable: "ACESTEP_CPP_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/go/acestep-cpp/Makefile"
|
||||
- repository: "PABannier/sam3.cpp"
|
||||
variable: "SAM3_VERSION"
|
||||
branch: "main"
|
||||
file: "backend/go/sam3-cpp/Makefile"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
8
Makefile
8
Makefile
@@ -1,5 +1,5 @@
|
||||
# Disable parallel execution for backend builds
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros
|
||||
.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp
|
||||
|
||||
GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
@@ -593,6 +593,9 @@ BACKEND_LLAMA_CPP_QUANTIZATION = llama-cpp-quantization|python|.|false|true
|
||||
# Rust backends
|
||||
BACKEND_KOKOROS = kokoros|rust|.|false|true
|
||||
|
||||
# C++ backends (Go wrapper with purego)
|
||||
BACKEND_SAM3_CPP = sam3-cpp|golang|.|false|true
|
||||
|
||||
# Helper function to build docker image for a backend
|
||||
# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
|
||||
define docker-build-backend
|
||||
@@ -652,12 +655,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_TRL)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP)))
|
||||
|
||||
# Pattern rule for docker-save targets
|
||||
docker-save-%: backend-images
|
||||
docker save local-ai-backend:$* -o backend-images/$*.tar
|
||||
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros
|
||||
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros docker-build-sam3-cpp
|
||||
|
||||
########################################################
|
||||
### Mock Backend for E2E Tests
|
||||
|
||||
@@ -444,6 +444,10 @@ message Message {
|
||||
|
||||
message DetectOptions {
|
||||
string src = 1;
|
||||
string prompt = 2; // Text prompt (for SAM 3 PCS mode)
|
||||
repeated float points = 3; // Point coordinates as [x1, y1, label1, x2, y2, label2, ...] (label: 1=pos, 0=neg)
|
||||
repeated float boxes = 4; // Box coordinates as [x1, y1, x2, y2, ...]
|
||||
float threshold = 5; // Detection confidence threshold
|
||||
}
|
||||
|
||||
message Detection {
|
||||
@@ -453,6 +457,7 @@ message Detection {
|
||||
float height = 4;
|
||||
float confidence = 5;
|
||||
string class_name = 6;
|
||||
bytes mask = 7; // PNG-encoded binary segmentation mask
|
||||
}
|
||||
|
||||
message DetectResponse {
|
||||
|
||||
7
backend/go/sam3-cpp/.gitignore
vendored
Normal file
7
backend/go/sam3-cpp/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
sources/
|
||||
build*/
|
||||
package/
|
||||
libgosam3*.so
|
||||
sam3-cpp
|
||||
test-models/
|
||||
test-data/
|
||||
26
backend/go/sam3-cpp/CMakeLists.txt
Normal file
26
backend/go/sam3-cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(gosam3 LANGUAGES C CXX)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
# Build ggml as static libraries to avoid runtime .so dependencies
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE)
|
||||
|
||||
set(SAM3_BUILD_EXAMPLES OFF CACHE BOOL "Disable sam3.cpp examples" FORCE)
|
||||
set(SAM3_BUILD_TESTS OFF CACHE BOOL "Disable sam3.cpp tests" FORCE)
|
||||
|
||||
add_subdirectory(./sources/sam3.cpp)
|
||||
|
||||
add_library(gosam3 MODULE gosam3.cpp)
|
||||
target_link_libraries(gosam3 PRIVATE sam3 ggml)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
|
||||
target_link_libraries(gosam3 PRIVATE stdc++fs)
|
||||
endif()
|
||||
|
||||
target_include_directories(gosam3 PUBLIC
|
||||
sources/sam3.cpp
|
||||
sources/sam3.cpp/ggml/include
|
||||
)
|
||||
|
||||
set_property(TARGET gosam3 PROPERTY CXX_STANDARD 14)
|
||||
set_target_properties(gosam3 PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
122
backend/go/sam3-cpp/Makefile
Normal file
122
backend/go/sam3-cpp/Makefile
Normal file
@@ -0,0 +1,122 @@
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
NATIVE?=false
|
||||
|
||||
GOCMD?=go
|
||||
GO_TAGS?=
|
||||
JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# sam3.cpp
|
||||
SAM3_REPO?=https://github.com/PABannier/sam3.cpp
|
||||
SAM3_VERSION?=8cc6e62bc740d7972746fcd47465ddf1c2b1e3c3
|
||||
|
||||
ifeq ($(NATIVE),false)
|
||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||
endif
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||
else ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=ON
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx \
|
||||
-DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||
-DCMAKE_C_COMPILER=icx \
|
||||
-DCMAKE_CXX_COMPILER=icpx
|
||||
endif
|
||||
|
||||
sources/sam3.cpp:
|
||||
git clone --recursive $(SAM3_REPO) sources/sam3.cpp && \
|
||||
cd sources/sam3.cpp && \
|
||||
git checkout $(SAM3_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
# Detect OS
|
||||
UNAME_S := $(shell uname -s)
|
||||
|
||||
# Only build CPU variants on Linux
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
|
||||
else
|
||||
# On non-Linux (e.g., Darwin), build only fallback variant
|
||||
VARIANT_TARGETS = libgosam3-fallback.so
|
||||
endif
|
||||
|
||||
sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
|
||||
CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o sam3-cpp ./
|
||||
|
||||
package: sam3-cpp
|
||||
bash package.sh
|
||||
|
||||
build: package
|
||||
|
||||
clean: purge
|
||||
rm -rf libgosam3*.so sam3-cpp package sources
|
||||
|
||||
purge:
|
||||
rm -rf build*
|
||||
|
||||
# Build all variants (Linux only)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
libgosam3-avx.so: sources/sam3.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I sam3-cpp build info:avx${RESET})
|
||||
SO_TARGET=libgosam3-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
||||
rm -rfv build*
|
||||
|
||||
libgosam3-avx2.so: sources/sam3.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I sam3-cpp build info:avx2${RESET})
|
||||
SO_TARGET=libgosam3-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgosam3-custom
|
||||
rm -rfv build*
|
||||
|
||||
libgosam3-avx512.so: sources/sam3.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I sam3-cpp build info:avx512${RESET})
|
||||
SO_TARGET=libgosam3-avx512.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgosam3-custom
|
||||
rm -rfv build*
|
||||
endif
|
||||
|
||||
# Build fallback variant (all platforms)
|
||||
libgosam3-fallback.so: sources/sam3.cpp
|
||||
$(MAKE) purge
|
||||
$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
|
||||
SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
|
||||
rm -rfv build*
|
||||
|
||||
libgosam3-custom: CMakeLists.txt gosam3.cpp gosam3.h
|
||||
mkdir -p build-$(SO_TARGET) && \
|
||||
cd build-$(SO_TARGET) && \
|
||||
cmake .. $(CMAKE_ARGS) && \
|
||||
cmake --build . --config Release -j$(JOBS) && \
|
||||
cd .. && \
|
||||
mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET)
|
||||
|
||||
all: sam3-cpp package
|
||||
193
backend/go/sam3-cpp/gosam3.cpp
Normal file
193
backend/go/sam3-cpp/gosam3.cpp
Normal file
@@ -0,0 +1,193 @@
|
||||
#include "sam3.h"
|
||||
#include "gosam3.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#define STB_IMAGE_WRITE_STATIC
|
||||
#include "stb_image_write.h"
|
||||
|
||||
// Static state
|
||||
static std::shared_ptr<sam3_model> g_model;
|
||||
static sam3_state_ptr g_state;
|
||||
static sam3_result g_result;
|
||||
static std::vector<std::vector<unsigned char>> g_mask_pngs;
|
||||
|
||||
// Callback for stbi_write_png_to_mem via stbi_write_png_to_func
|
||||
static void png_write_callback(void *context, void *data, int size) {
|
||||
auto *buf = static_cast<std::vector<unsigned char>*>(context);
|
||||
auto *bytes = static_cast<unsigned char*>(data);
|
||||
buf->insert(buf->end(), bytes, bytes + size);
|
||||
}
|
||||
|
||||
// Encode all masks as PNGs after segmentation
|
||||
static void encode_masks_as_png() {
|
||||
g_mask_pngs.clear();
|
||||
g_mask_pngs.resize(g_result.detections.size());
|
||||
|
||||
for (size_t i = 0; i < g_result.detections.size(); i++) {
|
||||
const auto &mask = g_result.detections[i].mask;
|
||||
if (mask.width > 0 && mask.height > 0 && !mask.data.empty()) {
|
||||
stbi_write_png_to_func(png_write_callback, &g_mask_pngs[i],
|
||||
mask.width, mask.height, 1,
|
||||
mask.data.data(), mask.width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
int sam3_cpp_load_model(const char *model_path, int threads) {
|
||||
sam3_params params;
|
||||
params.model_path = model_path;
|
||||
params.n_threads = threads;
|
||||
params.use_gpu = true;
|
||||
|
||||
g_model = sam3_load_model(params);
|
||||
if (!g_model) {
|
||||
fprintf(stderr, "[sam3-cpp] Failed to load model: %s\n", model_path);
|
||||
return 1;
|
||||
}
|
||||
|
||||
g_state = sam3_create_state(*g_model, params);
|
||||
if (!g_state) {
|
||||
fprintf(stderr, "[sam3-cpp] Failed to create state\n");
|
||||
g_model.reset();
|
||||
return 2;
|
||||
}
|
||||
|
||||
fprintf(stderr, "[sam3-cpp] Model loaded: %s (threads=%d)\n", model_path, threads);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sam3_cpp_encode_image(const char *image_path) {
|
||||
if (!g_model || !g_state) {
|
||||
fprintf(stderr, "[sam3-cpp] Model not loaded\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
sam3_image img = sam3_load_image(image_path);
|
||||
if (img.data.empty()) {
|
||||
fprintf(stderr, "[sam3-cpp] Failed to load image: %s\n", image_path);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (!sam3_encode_image(*g_state, *g_model, img)) {
|
||||
fprintf(stderr, "[sam3-cpp] Failed to encode image\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sam3_cpp_segment_pvs(float *points, int n_point_triples,
|
||||
float *boxes, int n_box_quads,
|
||||
float threshold) {
|
||||
if (!g_model || !g_state) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
sam3_pvs_params pvs_params;
|
||||
|
||||
// Parse points: each triple is [x, y, label]
|
||||
for (int i = 0; i < n_point_triples; i++) {
|
||||
float x = points[i * 3];
|
||||
float y = points[i * 3 + 1];
|
||||
float label = points[i * 3 + 2];
|
||||
sam3_point pt = {x, y};
|
||||
if (label > 0.5f) {
|
||||
pvs_params.pos_points.push_back(pt);
|
||||
} else {
|
||||
pvs_params.neg_points.push_back(pt);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse boxes: each quad is [x1, y1, x2, y2], use only first box
|
||||
if (n_box_quads > 0) {
|
||||
pvs_params.box = {boxes[0], boxes[1], boxes[2], boxes[3]};
|
||||
pvs_params.use_box = true;
|
||||
}
|
||||
|
||||
g_result = sam3_segment_pvs(*g_state, *g_model, pvs_params);
|
||||
encode_masks_as_png();
|
||||
|
||||
return static_cast<int>(g_result.detections.size());
|
||||
}
|
||||
|
||||
int sam3_cpp_segment_pcs(const char *text_prompt, float threshold) {
|
||||
if (!g_model || !g_state) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// PCS mode requires SAM 3 (full model with text encoder)
|
||||
if (sam3_is_visual_only(*g_model) ||
|
||||
sam3_get_model_type(*g_model) != SAM3_MODEL_SAM3) {
|
||||
fprintf(stderr, "[sam3-cpp] PCS mode requires full SAM 3 model\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sam3_pcs_params pcs_params;
|
||||
pcs_params.text_prompt = text_prompt;
|
||||
pcs_params.score_threshold = threshold > 0 ? threshold : 0.5f;
|
||||
|
||||
g_result = sam3_segment_pcs(*g_state, *g_model, pcs_params);
|
||||
encode_masks_as_png();
|
||||
|
||||
return static_cast<int>(g_result.detections.size());
|
||||
}
|
||||
|
||||
int sam3_cpp_get_n_detections(void) {
|
||||
return static_cast<int>(g_result.detections.size());
|
||||
}
|
||||
|
||||
float sam3_cpp_get_detection_x(int i) {
|
||||
if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
|
||||
return g_result.detections[i].box.x0;
|
||||
}
|
||||
|
||||
float sam3_cpp_get_detection_y(int i) {
|
||||
if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
|
||||
return g_result.detections[i].box.y0;
|
||||
}
|
||||
|
||||
float sam3_cpp_get_detection_w(int i) {
|
||||
if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
|
||||
const auto &box = g_result.detections[i].box;
|
||||
return box.x1 - box.x0;
|
||||
}
|
||||
|
||||
float sam3_cpp_get_detection_h(int i) {
|
||||
if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
|
||||
const auto &box = g_result.detections[i].box;
|
||||
return box.y1 - box.y0;
|
||||
}
|
||||
|
||||
float sam3_cpp_get_detection_score(int i) {
|
||||
if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
|
||||
return g_result.detections[i].score;
|
||||
}
|
||||
|
||||
int sam3_cpp_get_detection_mask_png(int i, unsigned char *buf, int buf_size) {
|
||||
if (i < 0 || i >= static_cast<int>(g_mask_pngs.size())) return 0;
|
||||
|
||||
const auto &png = g_mask_pngs[i];
|
||||
int size = static_cast<int>(png.size());
|
||||
|
||||
if (buf == nullptr) {
|
||||
return size;
|
||||
}
|
||||
|
||||
int to_copy = size < buf_size ? size : buf_size;
|
||||
memcpy(buf, png.data(), to_copy);
|
||||
return to_copy;
|
||||
}
|
||||
|
||||
void sam3_cpp_free_results(void) {
|
||||
g_result.detections.clear();
|
||||
g_mask_pngs.clear();
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
143
backend/go/sam3-cpp/gosam3.go
Normal file
143
backend/go/sam3-cpp/gosam3.go
Normal file
@@ -0,0 +1,143 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"unsafe"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type SAM3 struct {
|
||||
base.SingleThread
|
||||
}
|
||||
|
||||
var (
|
||||
CppLoadModel func(modelPath string, threads int) int
|
||||
CppEncodeImage func(imagePath string) int
|
||||
CppSegmentPVS func(points uintptr, nPointTriples int, boxes uintptr, nBoxQuads int, threshold float32) int
|
||||
CppSegmentPCS func(textPrompt string, threshold float32) int
|
||||
CppGetNDetections func() int
|
||||
CppGetDetectionX func(i int) float32
|
||||
CppGetDetectionY func(i int) float32
|
||||
CppGetDetectionW func(i int) float32
|
||||
CppGetDetectionH func(i int) float32
|
||||
CppGetDetectionScore func(i int) float32
|
||||
CppGetDetectionMaskPNG func(i int, buf uintptr, bufSize int) int
|
||||
CppFreeResults func()
|
||||
)
|
||||
|
||||
func (s *SAM3) Load(opts *pb.ModelOptions) error {
|
||||
modelFile := opts.ModelFile
|
||||
if modelFile == "" {
|
||||
modelFile = opts.Model
|
||||
}
|
||||
|
||||
var modelPath string
|
||||
if filepath.IsAbs(modelFile) {
|
||||
modelPath = modelFile
|
||||
} else {
|
||||
modelPath = filepath.Join(opts.ModelPath, modelFile)
|
||||
}
|
||||
|
||||
threads := int(opts.Threads)
|
||||
if threads <= 0 {
|
||||
threads = 4
|
||||
}
|
||||
|
||||
ret := CppLoadModel(modelPath, threads)
|
||||
if ret != 0 {
|
||||
return fmt.Errorf("failed to load SAM3 model (error %d): %s", ret, modelPath)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SAM3) Detect(opts *pb.DetectOptions) (pb.DetectResponse, error) {
|
||||
// Decode base64 image and write to temp file
|
||||
imgData, err := base64.StdEncoding.DecodeString(opts.Src)
|
||||
if err != nil {
|
||||
return pb.DetectResponse{}, fmt.Errorf("failed to decode image: %w", err)
|
||||
}
|
||||
|
||||
tmpFile, err := os.CreateTemp("", "sam3-*.png")
|
||||
if err != nil {
|
||||
return pb.DetectResponse{}, fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
defer os.Remove(tmpFile.Name())
|
||||
|
||||
if _, err := tmpFile.Write(imgData); err != nil {
|
||||
tmpFile.Close()
|
||||
return pb.DetectResponse{}, fmt.Errorf("failed to write temp file: %w", err)
|
||||
}
|
||||
tmpFile.Close()
|
||||
|
||||
// Encode image
|
||||
ret := CppEncodeImage(tmpFile.Name())
|
||||
if ret != 0 {
|
||||
return pb.DetectResponse{}, fmt.Errorf("failed to encode image (error %d)", ret)
|
||||
}
|
||||
|
||||
threshold := opts.Threshold
|
||||
if threshold <= 0 {
|
||||
threshold = 0.5
|
||||
}
|
||||
|
||||
// Determine segmentation mode
|
||||
var nDetections int
|
||||
if opts.Prompt != "" {
|
||||
// Text-prompted segmentation (PCS mode, SAM 3 only)
|
||||
nDetections = CppSegmentPCS(opts.Prompt, threshold)
|
||||
} else {
|
||||
// Point/box-prompted segmentation (PVS mode)
|
||||
var pointsPtr uintptr
|
||||
var boxesPtr uintptr
|
||||
nPointTriples := len(opts.Points) / 3
|
||||
nBoxQuads := len(opts.Boxes) / 4
|
||||
|
||||
if nPointTriples > 0 {
|
||||
pointsPtr = uintptr(unsafe.Pointer(&opts.Points[0]))
|
||||
}
|
||||
if nBoxQuads > 0 {
|
||||
boxesPtr = uintptr(unsafe.Pointer(&opts.Boxes[0]))
|
||||
}
|
||||
|
||||
nDetections = CppSegmentPVS(pointsPtr, nPointTriples, boxesPtr, nBoxQuads, threshold)
|
||||
}
|
||||
|
||||
if nDetections < 0 {
|
||||
return pb.DetectResponse{}, fmt.Errorf("segmentation failed")
|
||||
}
|
||||
|
||||
defer CppFreeResults()
|
||||
|
||||
// Build response
|
||||
detections := make([]*pb.Detection, nDetections)
|
||||
for i := 0; i < nDetections; i++ {
|
||||
det := &pb.Detection{
|
||||
X: CppGetDetectionX(i),
|
||||
Y: CppGetDetectionY(i),
|
||||
Width: CppGetDetectionW(i),
|
||||
Height: CppGetDetectionH(i),
|
||||
Confidence: CppGetDetectionScore(i),
|
||||
ClassName: "segment",
|
||||
}
|
||||
|
||||
// Get mask PNG
|
||||
maskSize := CppGetDetectionMaskPNG(i, 0, 0)
|
||||
if maskSize > 0 {
|
||||
maskBuf := make([]byte, maskSize)
|
||||
CppGetDetectionMaskPNG(i, uintptr(unsafe.Pointer(&maskBuf[0])), maskSize)
|
||||
det.Mask = maskBuf
|
||||
}
|
||||
|
||||
detections[i] = det
|
||||
}
|
||||
|
||||
return pb.DetectResponse{
|
||||
Detections: detections,
|
||||
}, nil
|
||||
}
|
||||
51
backend/go/sam3-cpp/gosam3.h
Normal file
51
backend/go/sam3-cpp/gosam3.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef GOSAM3_H
|
||||
#define GOSAM3_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Load model from file. Returns 0 on success, non-zero on failure.
|
||||
int sam3_cpp_load_model(const char *model_path, int threads);
|
||||
|
||||
// Encode an image from file path. Must be called before segmentation.
|
||||
// Returns 0 on success.
|
||||
int sam3_cpp_encode_image(const char *image_path);
|
||||
|
||||
// Segment with point/box prompts (PVS mode).
|
||||
// points: flat array of [x, y, label] triples (label: 1=positive, 0=negative)
|
||||
// boxes: flat array of [x1, y1, x2, y2] quads
|
||||
// Returns number of detections, or -1 on error.
|
||||
int sam3_cpp_segment_pvs(float *points, int n_point_triples,
|
||||
float *boxes, int n_box_quads,
|
||||
float threshold);
|
||||
|
||||
// Segment with text prompt (PCS mode, SAM 3 only).
|
||||
// Returns number of detections, or -1 on error.
|
||||
int sam3_cpp_segment_pcs(const char *text_prompt, float threshold);
|
||||
|
||||
// Access detection results (valid after a segment call).
|
||||
int sam3_cpp_get_n_detections(void);
|
||||
|
||||
// Get bounding box for detection i (as x, y, width, height).
|
||||
float sam3_cpp_get_detection_x(int i);
|
||||
float sam3_cpp_get_detection_y(int i);
|
||||
float sam3_cpp_get_detection_w(int i);
|
||||
float sam3_cpp_get_detection_h(int i);
|
||||
|
||||
// Get confidence score for detection i.
|
||||
float sam3_cpp_get_detection_score(int i);
|
||||
|
||||
// Get mask as PNG-encoded bytes.
|
||||
// If buf is NULL, returns the required buffer size.
|
||||
// Otherwise writes up to buf_size bytes and returns bytes written.
|
||||
int sam3_cpp_get_detection_mask_png(int i, unsigned char *buf, int buf_size);
|
||||
|
||||
// Free current detection results.
|
||||
void sam3_cpp_free_results(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // GOSAM3_H
|
||||
56
backend/go/sam3-cpp/main.go
Normal file
56
backend/go/sam3-cpp/main.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
type LibFuncs struct {
|
||||
FuncPtr any
|
||||
Name string
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Get library name from environment variable, default to fallback
|
||||
libName := os.Getenv("SAM3_LIBRARY")
|
||||
if libName == "" {
|
||||
libName = "./libgosam3-fallback.so"
|
||||
}
|
||||
|
||||
gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
libFuncs := []LibFuncs{
|
||||
{&CppLoadModel, "sam3_cpp_load_model"},
|
||||
{&CppEncodeImage, "sam3_cpp_encode_image"},
|
||||
{&CppSegmentPVS, "sam3_cpp_segment_pvs"},
|
||||
{&CppSegmentPCS, "sam3_cpp_segment_pcs"},
|
||||
{&CppGetNDetections, "sam3_cpp_get_n_detections"},
|
||||
{&CppGetDetectionX, "sam3_cpp_get_detection_x"},
|
||||
{&CppGetDetectionY, "sam3_cpp_get_detection_y"},
|
||||
{&CppGetDetectionW, "sam3_cpp_get_detection_w"},
|
||||
{&CppGetDetectionH, "sam3_cpp_get_detection_h"},
|
||||
{&CppGetDetectionScore, "sam3_cpp_get_detection_score"},
|
||||
{&CppGetDetectionMaskPNG, "sam3_cpp_get_detection_mask_png"},
|
||||
{&CppFreeResults, "sam3_cpp_free_results"},
|
||||
}
|
||||
|
||||
for _, lf := range libFuncs {
|
||||
purego.RegisterLibFunc(lf.FuncPtr, gosamLib, lf.Name)
|
||||
}
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &SAM3{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
59
backend/go/sam3-cpp/package.sh
Executable file
59
backend/go/sam3-cpp/package.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to copy the appropriate libraries based on architecture
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
REPO_ROOT="${CURDIR}/../../.."
|
||||
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/
|
||||
cp -avf $CURDIR/sam3-cpp $CURDIR/package/
|
||||
cp -fv $CURDIR/run.sh $CURDIR/package/
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
# x86_64 architecture
|
||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||
# ARM64 architecture
|
||||
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||
elif [ $(uname -s) = "Darwin" ]; then
|
||||
echo "Detected Darwin"
|
||||
else
|
||||
echo "Error: Could not detect architecture"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Package GPU libraries based on BUILD_TYPE
|
||||
GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
|
||||
if [ -f "$GPU_LIB_SCRIPT" ]; then
|
||||
echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
|
||||
source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
|
||||
package_gpu_libs
|
||||
fi
|
||||
|
||||
echo "Packaging completed successfully"
|
||||
ls -liah $CURDIR/package/
|
||||
ls -liah $CURDIR/package/lib/
|
||||
52
backend/go/sam3-cpp/run.sh
Executable file
52
backend/go/sam3-cpp/run.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
# Get the absolute current dir where the script is located
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
cd /
|
||||
|
||||
echo "CPU info:"
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||
grep -e "flags" /proc/cpuinfo | head -1
|
||||
fi
|
||||
|
||||
LIBRARY="$CURDIR/libgosam3-fallback.so"
|
||||
|
||||
if [ "$(uname)" != "Darwin" ]; then
|
||||
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX found OK"
|
||||
if [ -e $CURDIR/libgosam3-avx.so ]; then
|
||||
LIBRARY="$CURDIR/libgosam3-avx.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX2 found OK"
|
||||
if [ -e $CURDIR/libgosam3-avx2.so ]; then
|
||||
LIBRARY="$CURDIR/libgosam3-avx2.so"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check avx 512
|
||||
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||
echo "CPU: AVX512F found OK"
|
||||
if [ -e $CURDIR/libgosam3-avx512.so ]; then
|
||||
LIBRARY="$CURDIR/libgosam3-avx512.so"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||
export SAM3_LIBRARY=$LIBRARY
|
||||
|
||||
# If there is a lib/ld.so, use it
|
||||
if [ -f $CURDIR/lib/ld.so ]; then
|
||||
echo "Using lib/ld.so"
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/lib/ld.so $CURDIR/sam3-cpp "$@"
|
||||
fi
|
||||
|
||||
echo "Using library: $LIBRARY"
|
||||
exec $CURDIR/sam3-cpp "$@"
|
||||
50
backend/go/sam3-cpp/test.sh
Executable file
50
backend/go/sam3-cpp/test.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
|
||||
echo "Running sam3-cpp backend tests..."
|
||||
|
||||
# The test requires a SAM model in GGML format.
|
||||
# Uses EdgeTAM Q4_0 (~15MB) for fast CI testing.
|
||||
SAM3_MODEL_DIR="${SAM3_MODEL_DIR:-$CURDIR/test-models}"
|
||||
SAM3_MODEL_FILE="${SAM3_MODEL_FILE:-edgetam_q4_0.ggml}"
|
||||
SAM3_MODEL_URL="${SAM3_MODEL_URL:-https://huggingface.co/PABannier/sam3.cpp/resolve/main/edgetam_q4_0.ggml}"
|
||||
|
||||
# Download model if not present
|
||||
if [ ! -f "$SAM3_MODEL_DIR/$SAM3_MODEL_FILE" ]; then
|
||||
echo "Downloading EdgeTAM Q4_0 model for testing..."
|
||||
mkdir -p "$SAM3_MODEL_DIR"
|
||||
curl -L -o "$SAM3_MODEL_DIR/$SAM3_MODEL_FILE" "$SAM3_MODEL_URL" --progress-bar
|
||||
echo "Model downloaded."
|
||||
fi
|
||||
|
||||
# Create a test image (4x4 red pixel PNG) using base64
|
||||
# This is a minimal valid PNG for testing the pipeline
|
||||
TEST_IMAGE_DIR="$CURDIR/test-data"
|
||||
mkdir -p "$TEST_IMAGE_DIR"
|
||||
|
||||
# Generate a simple test image using Python if available, otherwise use a pre-encoded one
|
||||
if command -v python3 &> /dev/null; then
|
||||
python3 -c "
|
||||
import struct, zlib, base64
|
||||
def create_png(width, height, r, g, b):
|
||||
raw = b''
|
||||
for y in range(height):
|
||||
raw += b'\x00' # filter byte
|
||||
for x in range(width):
|
||||
raw += bytes([r, g, b])
|
||||
def chunk(ctype, data):
|
||||
c = ctype + data
|
||||
return struct.pack('>I', len(data)) + c + struct.pack('>I', zlib.crc32(c) & 0xffffffff)
|
||||
ihdr = struct.pack('>IIBBBBB', width, height, 8, 2, 0, 0, 0)
|
||||
return b'\x89PNG\r\n\x1a\n' + chunk(b'IHDR', ihdr) + chunk(b'IDAT', zlib.compress(raw)) + chunk(b'IEND', b'')
|
||||
with open('$TEST_IMAGE_DIR/test.png', 'wb') as f:
|
||||
f.write(create_png(64, 64, 255, 0, 0))
|
||||
"
|
||||
echo "Test image created."
|
||||
fi
|
||||
|
||||
echo "sam3-cpp test setup complete."
|
||||
echo "Model: $SAM3_MODEL_DIR/$SAM3_MODEL_FILE"
|
||||
echo "Note: Full integration tests run via the LocalAI test-extra target."
|
||||
@@ -125,6 +125,31 @@
|
||||
nvidia-cuda-13: "cuda13-rfdetr"
|
||||
nvidia-cuda-12: "cuda12-rfdetr"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr"
|
||||
- &sam3cpp
|
||||
name: "sam3-cpp"
|
||||
alias: "sam3-cpp"
|
||||
license: mit
|
||||
description: |
|
||||
Segment Anything Model (SAM 3/2/EdgeTAM) in C/C++ using GGML.
|
||||
Supports text-prompted and point/box-prompted image segmentation.
|
||||
urls:
|
||||
- https://github.com/PABannier/sam3.cpp
|
||||
tags:
|
||||
- image-segmentation
|
||||
- object-detection
|
||||
- sam3
|
||||
- gpu
|
||||
- cpu
|
||||
capabilities:
|
||||
default: "cpu-sam3-cpp"
|
||||
nvidia: "cuda12-sam3-cpp"
|
||||
nvidia-cuda-12: "cuda12-sam3-cpp"
|
||||
nvidia-cuda-13: "cuda13-sam3-cpp"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-sam3-cpp"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-sam3-cpp"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp"
|
||||
intel: "intel-sycl-f32-sam3-cpp"
|
||||
vulkan: "vulkan-sam3-cpp"
|
||||
- &vllm
|
||||
name: "vllm"
|
||||
license: apache-2.0
|
||||
@@ -1628,6 +1653,89 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-metal-darwin-arm64-rfdetr
|
||||
## sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "sam3-cpp-development"
|
||||
capabilities:
|
||||
default: "cpu-sam3-cpp-development"
|
||||
nvidia: "cuda12-sam3-cpp-development"
|
||||
nvidia-cuda-12: "cuda12-sam3-cpp-development"
|
||||
nvidia-cuda-13: "cuda13-sam3-cpp-development"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-sam3-cpp-development"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-sam3-cpp-development"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp-development"
|
||||
intel: "intel-sycl-f32-sam3-cpp-development"
|
||||
vulkan: "vulkan-sam3-cpp-development"
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cpu-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cpu-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-cpu-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda12-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda12-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-12-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda13-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda13-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "nvidia-l4t-arm64-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "nvidia-l4t-arm64-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-arm64-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda13-nvidia-l4t-arm64-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "cuda13-nvidia-l4t-arm64-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "intel-sycl-f32-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-sycl-f32-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "intel-sycl-f32-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-intel-sycl-f32-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "vulkan-sam3-cpp"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-vulkan-sam3-cpp
|
||||
- !!merge <<: *sam3cpp
|
||||
name: "vulkan-sam3-cpp-development"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-sam3-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-vulkan-sam3-cpp
|
||||
## Rerankers
|
||||
- !!merge <<: *rerankers
|
||||
name: "rerankers-development"
|
||||
|
||||
@@ -13,6 +13,10 @@ import (
|
||||
|
||||
func Detection(
|
||||
sourceFile string,
|
||||
prompt string,
|
||||
points []float32,
|
||||
boxes []float32,
|
||||
threshold float32,
|
||||
loader *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelConfig config.ModelConfig,
|
||||
@@ -35,7 +39,11 @@ func Detection(
|
||||
}
|
||||
|
||||
res, err := detectionModel.Detect(context.Background(), &proto.DetectOptions{
|
||||
Src: sourceFile,
|
||||
Src: sourceFile,
|
||||
Prompt: prompt,
|
||||
Points: points,
|
||||
Boxes: boxes,
|
||||
Threshold: threshold,
|
||||
})
|
||||
|
||||
if appConfig.EnableTracing {
|
||||
|
||||
@@ -705,7 +705,8 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
|
||||
}
|
||||
|
||||
if (u & FLAG_DETECTION) == FLAG_DETECTION {
|
||||
if c.Backend != "rfdetr" {
|
||||
detectionBackends := []string{"rfdetr", "sam3-cpp"}
|
||||
if !slices.Contains(detectionBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
@@ -37,7 +39,7 @@ func DetectionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
|
||||
return err
|
||||
}
|
||||
|
||||
res, err := backend.Detection(image, ml, appConfig, *cfg)
|
||||
res, err := backend.Detection(image, input.Prompt, input.Points, input.Boxes, input.Threshold, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -46,12 +48,18 @@ func DetectionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
|
||||
Detections: make([]schema.Detection, len(res.Detections)),
|
||||
}
|
||||
for i, detection := range res.Detections {
|
||||
var mask string
|
||||
if len(detection.Mask) > 0 {
|
||||
mask = base64.StdEncoding.EncodeToString(detection.Mask)
|
||||
}
|
||||
response.Detections[i] = schema.Detection{
|
||||
X: detection.X,
|
||||
Y: detection.Y,
|
||||
Width: detection.Width,
|
||||
Height: detection.Height,
|
||||
ClassName: detection.ClassName,
|
||||
X: detection.X,
|
||||
Y: detection.Y,
|
||||
Width: detection.Width,
|
||||
Height: detection.Height,
|
||||
ClassName: detection.ClassName,
|
||||
Confidence: detection.Confidence,
|
||||
Mask: mask,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -152,7 +152,11 @@ type SystemInformationResponse struct {
|
||||
|
||||
type DetectionRequest struct {
|
||||
BasicModelRequest
|
||||
Image string `json:"image"` // URL or base64-encoded image to analyze
|
||||
Image string `json:"image"` // URL or base64-encoded image to analyze
|
||||
Prompt string `json:"prompt,omitempty"` // Text prompt (for SAM 3 PCS mode)
|
||||
Points []float32 `json:"points,omitempty"` // Point coordinates as [x,y,label,...] triples (label: 1=pos, 0=neg)
|
||||
Boxes []float32 `json:"boxes,omitempty"` // Box coordinates as [x1,y1,x2,y2,...] quads
|
||||
Threshold float32 `json:"threshold,omitempty"` // Detection confidence threshold
|
||||
}
|
||||
|
||||
type DetectionResponse struct {
|
||||
@@ -160,11 +164,13 @@ type DetectionResponse struct {
|
||||
}
|
||||
|
||||
type Detection struct {
|
||||
X float32 `json:"x"`
|
||||
Y float32 `json:"y"`
|
||||
Width float32 `json:"width"`
|
||||
Height float32 `json:"height"`
|
||||
ClassName string `json:"class_name"`
|
||||
X float32 `json:"x"`
|
||||
Y float32 `json:"y"`
|
||||
Width float32 `json:"width"`
|
||||
Height float32 `json:"height"`
|
||||
ClassName string `json:"class_name"`
|
||||
Confidence float32 `json:"confidence,omitempty"`
|
||||
Mask string `json:"mask,omitempty"` // base64-encoded PNG segmentation mask
|
||||
}
|
||||
|
||||
type ImportModelRequest struct {
|
||||
|
||||
@@ -5,7 +5,7 @@ weight = 13
|
||||
url = "/features/object-detection/"
|
||||
+++
|
||||
|
||||
LocalAI supports object detection through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Currently, [RF-DETR](https://github.com/roboflow/rf-detr) is available as an implementation.
|
||||
LocalAI supports object detection and image segmentation through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Available backends include [RF-DETR](https://github.com/roboflow/rf-detr) for object detection and [sam3.cpp](https://github.com/PABannier/sam3.cpp) for image segmentation (SAM 3/2/EdgeTAM).
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -14,6 +14,8 @@ Object detection in LocalAI is implemented through dedicated backends that can i
|
||||
**Key Features:**
|
||||
- Real-time object detection
|
||||
- High accuracy detection with bounding boxes
|
||||
- Image segmentation with binary masks (SAM backends)
|
||||
- Text-prompted, point-prompted, and box-prompted segmentation
|
||||
- Support for multiple hardware accelerators (CPU, NVIDIA GPU, Intel GPU, AMD GPU)
|
||||
- Structured detection results with confidence scores
|
||||
- Easy integration through the `/v1/detection` endpoint
|
||||
@@ -45,6 +47,10 @@ The request body should contain:
|
||||
- `image`: The image to analyze, which can be:
|
||||
- A URL to an image
|
||||
- A base64-encoded image
|
||||
- `prompt` (optional): Text prompt for text-prompted segmentation (SAM 3 only)
|
||||
- `points` (optional): Point coordinates as `[x, y, label, ...]` triples (label: 1=positive, 0=negative)
|
||||
- `boxes` (optional): Box coordinates as `[x1, y1, x2, y2, ...]` quads
|
||||
- `threshold` (optional): Detection confidence threshold (default: 0.5)
|
||||
|
||||
### Response Format
|
||||
|
||||
@@ -78,6 +84,7 @@ Each detection includes:
|
||||
- `width`, `height`: Dimensions of the bounding box
|
||||
- `confidence`: Detection confidence score (0.0 to 1.0)
|
||||
- `class_name`: The detected object class
|
||||
- `mask` (optional): Base64-encoded PNG binary segmentation mask (SAM backends only)
|
||||
|
||||
## Backends
|
||||
|
||||
@@ -123,6 +130,76 @@ Currently, the following model is available in the [Model Gallery]({{%relref "fe
|
||||
|
||||
You can browse and install this model through the LocalAI web interface or using the command line.
|
||||
|
||||
### SAM3 Backend (sam3-cpp)
|
||||
|
||||
The sam3-cpp backend provides image segmentation using [sam3.cpp](https://github.com/PABannier/sam3.cpp), a portable C++ implementation of Meta's Segment Anything Model. It supports multiple model architectures:
|
||||
|
||||
- **SAM 3**: Full model with text encoder for text-prompted detection and segmentation
|
||||
- **SAM 2 / SAM 2.1**: Hiera backbone models in multiple sizes
|
||||
- **SAM 3 Visual-Only**: Point/box segmentation without text encoder
|
||||
- **EdgeTAM**: Ultra-efficient mobile variant (~15MB quantized)
|
||||
|
||||
#### Setup
|
||||
|
||||
1. **Manual Configuration**
|
||||
|
||||
Create a model configuration file in your `models` directory:
|
||||
|
||||
```yaml
|
||||
name: sam3
|
||||
backend: sam3-cpp
|
||||
parameters:
|
||||
model: edgetam_q4_0.ggml
|
||||
threads: 4
|
||||
known_usecases:
|
||||
- detection
|
||||
```
|
||||
|
||||
Download the model from [Hugging Face](https://huggingface.co/PABannier/sam3.cpp).
|
||||
|
||||
#### Segmentation Modes
|
||||
|
||||
**Point-prompted segmentation** (all models):
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/v1/detection \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "sam3",
|
||||
"image": "data:image/jpeg;base64,...",
|
||||
"points": [256.0, 256.0, 1.0],
|
||||
"threshold": 0.5
|
||||
}'
|
||||
```
|
||||
|
||||
**Box-prompted segmentation** (all models):
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/v1/detection \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "sam3",
|
||||
"image": "data:image/jpeg;base64,...",
|
||||
"boxes": [100.0, 100.0, 400.0, 400.0],
|
||||
"threshold": 0.5
|
||||
}'
|
||||
```
|
||||
|
||||
**Text-prompted segmentation** (SAM 3 full model only):
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/v1/detection \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "sam3",
|
||||
"image": "data:image/jpeg;base64,...",
|
||||
"prompt": "cat",
|
||||
"threshold": 0.5
|
||||
}'
|
||||
```
|
||||
|
||||
The response includes segmentation masks as base64-encoded PNGs in the `mask` field of each detection.
|
||||
|
||||
## Examples
|
||||
|
||||
### Basic Object Detection
|
||||
@@ -180,6 +257,7 @@ local-ai run --debug rfdetr-base
|
||||
LocalAI includes a dedicated **object-detection** category for models and backends that specialize in identifying and locating objects within images. This category currently includes:
|
||||
|
||||
- **RF-DETR**: Real-time transformer-based object detection
|
||||
- **sam3-cpp**: SAM 3/2/EdgeTAM image segmentation
|
||||
|
||||
Additional object detection models and backends will be added to this category in the future. You can filter models by the `object-detection` tag in the model gallery to find all available object detection models.
|
||||
|
||||
|
||||
@@ -3134,6 +3134,37 @@
|
||||
model: rfdetr-base
|
||||
known_usecases:
|
||||
- detection
|
||||
- &sam3cpp
|
||||
name: "edgetam"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
size: "16MB"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
EdgeTAM is an ultra-efficient variant of the Segment Anything Model (SAM) for image segmentation.
|
||||
It uses a RepViT backbone and is only ~16MB quantized (Q4_0), making it ideal for edge deployment.
|
||||
Supports point-prompted and box-prompted image segmentation via the /v1/detection endpoint.
|
||||
Powered by sam3.cpp (C/C++ with GGML).
|
||||
tags:
|
||||
- image-segmentation
|
||||
- object-detection
|
||||
- sam3
|
||||
- edgetam
|
||||
- cpu
|
||||
- gpu
|
||||
urls:
|
||||
- https://github.com/PABannier/sam3.cpp
|
||||
- https://huggingface.co/PABannier/sam3.cpp
|
||||
overrides:
|
||||
backend: sam3-cpp
|
||||
parameters:
|
||||
model: edgetam_q4_0.ggml
|
||||
threads: 4
|
||||
known_usecases:
|
||||
- detection
|
||||
files:
|
||||
- filename: edgetam_q4_0.ggml
|
||||
sha256: a8a35e35fb9a1b6f099c3f35e3024548b0fc979c2a4184642562804192496e09
|
||||
uri: huggingface://PABannier/sam3.cpp/edgetam_q4_0.ggml
|
||||
- name: "dream-org_dream-v0-instruct-7b"
|
||||
# chatml
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
|
||||
Reference in New Issue
Block a user