feat(sam.cpp): add sam.cpp detection backend (#9288)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-06-08 16:57:08 -04:00 · 2026-04-09 21:49:11 +02:00
parent 13a6ed709c
commit 706cf5d43c
21 changed files with 1134 additions and 17 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -574,6 +574,19 @@ jobs:
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "8"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-sam3-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "8"
@@ -1147,6 +1160,32 @@ jobs:
            backend: "stablediffusion-ggml"
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-sam3-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-sam3-cpp'
+            base-image: "ubuntu:24.04"
+            ubuntu-version: '2404'
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
          - build-type: 'cublas'
            cuda-major-version: "13"
            cuda-minor-version: "0"
@@ -1907,6 +1946,59 @@ jobs:
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
            ubuntu-version: '2404'
+          # sam3-cpp
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-sam3-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'sycl_f32'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-intel-sycl-f32-sam3-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
+            skip-drivers: 'false'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'sycl_f16'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-intel-sycl-f16-sam3-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
+            skip-drivers: 'false'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'vulkan'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-vulkan-sam3-cpp'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
          - build-type: 'sycl_f32'
            cuda-major-version: ""
            cuda-minor-version: ""
@@ -1959,6 +2051,19 @@ jobs:
            dockerfile: "./backend/Dockerfile.golang"
            context: "./"
            ubuntu-version: '2204'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-sam3-cpp'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "sam3-cpp"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2204'
          # whisper
          - build-type: ''
            cuda-major-version: ""
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -34,6 +34,10 @@ jobs:
            variable: "ACESTEP_CPP_VERSION"
            branch: "master"
            file: "backend/go/acestep-cpp/Makefile"
+          - repository: "PABannier/sam3.cpp"
+            variable: "SAM3_VERSION"
+            branch: "main"
+            file: "backend/go/sam3-cpp/Makefile"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
--- a/8
+++ b/8
@@ -1,5 +1,5 @@
 # Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/outetts backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/mlx-distributed backends/stablediffusion-ggml-darwin backends/vllm backends/vllm-omni backends/moonshine backends/pocket-tts backends/qwen-tts backends/faster-qwen3-tts backends/qwen-asr backends/nemo backends/voxcpm backends/whisperx backends/ace-step backends/acestep-cpp backends/fish-speech backends/voxtral backends/opus backends/trl backends/llama-cpp-quantization backends/kokoros backends/sam3-cpp

 GOCMD=go
 GOTEST=$(GOCMD) test
@@ -593,6 +593,9 @@ BACKEND_LLAMA_CPP_QUANTIZATION = llama-cpp-quantization|python|.|false|true
 # Rust backends
 BACKEND_KOKOROS = kokoros|rust|.|false|true

+# C++ backends (Go wrapper with purego)
+BACKEND_SAM3_CPP = sam3-cpp|golang|.|false|true
+
 # Helper function to build docker image for a backend
 # Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
 define docker-build-backend
@@ -652,12 +655,13 @@ $(eval $(call generate-docker-build-target,$(BACKEND_MLX_DISTRIBUTED)))
 $(eval $(call generate-docker-build-target,$(BACKEND_TRL)))
 $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_QUANTIZATION)))
 $(eval $(call generate-docker-build-target,$(BACKEND_KOKOROS)))
+$(eval $(call generate-docker-build-target,$(BACKEND_SAM3_CPP)))

 # Pattern rule for docker-save targets
 docker-save-%: backend-images
 	docker save local-ai-backend:$* -o backend-images/$*.tar

-docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros
+docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-vllm-omni docker-build-transformers docker-build-outetts docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-chatterbox docker-build-vibevoice docker-build-moonshine docker-build-pocket-tts docker-build-qwen-tts docker-build-fish-speech docker-build-faster-qwen3-tts docker-build-qwen-asr docker-build-nemo docker-build-voxcpm docker-build-whisperx docker-build-ace-step docker-build-acestep-cpp docker-build-voxtral docker-build-mlx-distributed docker-build-trl docker-build-llama-cpp-quantization docker-build-kokoros docker-build-sam3-cpp

 ########################################################
 ### Mock Backend for E2E Tests
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -444,6 +444,10 @@ message Message {

 message DetectOptions {
  string src = 1;
+  string prompt = 2;           // Text prompt (for SAM 3 PCS mode)
+  repeated float points = 3;   // Point coordinates as [x1, y1, label1, x2, y2, label2, ...] (label: 1=pos, 0=neg)
+  repeated float boxes = 4;    // Box coordinates as [x1, y1, x2, y2, ...]
+  float threshold = 5;         // Detection confidence threshold
 }

 message Detection {
@@ -453,6 +457,7 @@ message Detection {
  float height = 4;
  float confidence = 5;
  string class_name = 6;
+  bytes mask = 7;              // PNG-encoded binary segmentation mask
 }

 message DetectResponse {
--- a/backend/go/sam3-cpp/.gitignore
+++ b/backend/go/sam3-cpp/.gitignore
@@ -0,0 +1,7 @@
+sources/
+build*/
+package/
+libgosam3*.so
+sam3-cpp
+test-models/
+test-data/
--- a/backend/go/sam3-cpp/CMakeLists.txt
+++ b/backend/go/sam3-cpp/CMakeLists.txt
@@ -0,0 +1,26 @@
+cmake_minimum_required(VERSION 3.14)
+project(gosam3 LANGUAGES C CXX)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# Build ggml as static libraries to avoid runtime .so dependencies
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static libraries" FORCE)
+
+set(SAM3_BUILD_EXAMPLES OFF CACHE BOOL "Disable sam3.cpp examples" FORCE)
+set(SAM3_BUILD_TESTS OFF CACHE BOOL "Disable sam3.cpp tests" FORCE)
+
+add_subdirectory(./sources/sam3.cpp)
+
+add_library(gosam3 MODULE gosam3.cpp)
+target_link_libraries(gosam3 PRIVATE sam3 ggml)
+
+if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
+    target_link_libraries(gosam3 PRIVATE stdc++fs)
+endif()
+
+target_include_directories(gosam3 PUBLIC
+    sources/sam3.cpp
+    sources/sam3.cpp/ggml/include
+)
+
+set_property(TARGET gosam3 PROPERTY CXX_STANDARD 14)
+set_target_properties(gosam3 PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
--- a/backend/go/sam3-cpp/Makefile
+++ b/backend/go/sam3-cpp/Makefile
@@ -0,0 +1,122 @@
+CMAKE_ARGS?=
+BUILD_TYPE?=
+NATIVE?=false
+
+GOCMD?=go
+GO_TAGS?=
+JOBS?=$(shell nproc --ignore=1)
+
+# sam3.cpp
+SAM3_REPO?=https://github.com/PABannier/sam3.cpp
+SAM3_VERSION?=8cc6e62bc740d7972746fcd47465ddf1c2b1e3c3
+
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+
+# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
+ifeq ($(BUILD_TYPE),cublas)
+	CMAKE_ARGS+=-DGGML_CUDA=ON
+else ifeq ($(BUILD_TYPE),openblas)
+	CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+else ifeq ($(BUILD_TYPE),clblas)
+	CMAKE_ARGS+=-DGGML_CLBLAST=ON
+else ifeq ($(BUILD_TYPE),hipblas)
+	ROCM_HOME ?= /opt/rocm
+	ROCM_PATH ?= /opt/rocm
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+	AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
+else ifeq ($(BUILD_TYPE),vulkan)
+	CMAKE_ARGS+=-DGGML_VULKAN=ON
+else ifeq ($(OS),Darwin)
+	ifneq ($(BUILD_TYPE),metal)
+		CMAKE_ARGS+=-DGGML_METAL=OFF
+	else
+		CMAKE_ARGS+=-DGGML_METAL=ON
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+	endif
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f16)
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx \
+		-DGGML_SYCL_F16=ON
+endif
+
+ifeq ($(BUILD_TYPE),sycl_f32)
+	CMAKE_ARGS+=-DGGML_SYCL=ON \
+		-DCMAKE_C_COMPILER=icx \
+		-DCMAKE_CXX_COMPILER=icpx
+endif
+
+sources/sam3.cpp:
+	git clone --recursive $(SAM3_REPO) sources/sam3.cpp && \
+	cd sources/sam3.cpp && \
+	git checkout $(SAM3_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+# Detect OS
+UNAME_S := $(shell uname -s)
+
+# Only build CPU variants on Linux
+ifeq ($(UNAME_S),Linux)
+	VARIANT_TARGETS = libgosam3-avx.so libgosam3-avx2.so libgosam3-avx512.so libgosam3-fallback.so
+else
+	# On non-Linux (e.g., Darwin), build only fallback variant
+	VARIANT_TARGETS = libgosam3-fallback.so
+endif
+
+sam3-cpp: main.go gosam3.go $(VARIANT_TARGETS)
+	CGO_ENABLED=0 $(GOCMD) build -tags "$(GO_TAGS)" -o sam3-cpp ./
+
+package: sam3-cpp
+	bash package.sh
+
+build: package
+
+clean: purge
+	rm -rf libgosam3*.so sam3-cpp package sources
+
+purge:
+	rm -rf build*
+
+# Build all variants (Linux only)
+ifeq ($(UNAME_S),Linux)
+libgosam3-avx.so: sources/sam3.cpp
+	$(MAKE) purge
+	$(info ${GREEN}I sam3-cpp build info:avx${RESET})
+	SO_TARGET=libgosam3-avx.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
+	rm -rfv build*
+
+libgosam3-avx2.so: sources/sam3.cpp
+	$(MAKE) purge
+	$(info ${GREEN}I sam3-cpp build info:avx2${RESET})
+	SO_TARGET=libgosam3-avx2.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgosam3-custom
+	rm -rfv build*
+
+libgosam3-avx512.so: sources/sam3.cpp
+	$(MAKE) purge
+	$(info ${GREEN}I sam3-cpp build info:avx512${RESET})
+	SO_TARGET=libgosam3-avx512.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on -DGGML_BMI2=on" $(MAKE) libgosam3-custom
+	rm -rfv build*
+endif
+
+# Build fallback variant (all platforms)
+libgosam3-fallback.so: sources/sam3.cpp
+	$(MAKE) purge
+	$(info ${GREEN}I sam3-cpp build info:fallback${RESET})
+	SO_TARGET=libgosam3-fallback.so CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) libgosam3-custom
+	rm -rfv build*
+
+libgosam3-custom: CMakeLists.txt gosam3.cpp gosam3.h
+	mkdir -p build-$(SO_TARGET) && \
+	cd build-$(SO_TARGET) && \
+	cmake .. $(CMAKE_ARGS) && \
+	cmake --build . --config Release -j$(JOBS) && \
+	cd .. && \
+	mv build-$(SO_TARGET)/libgosam3.so ./$(SO_TARGET)
+
+all: sam3-cpp package
--- a/backend/go/sam3-cpp/gosam3.cpp
+++ b/backend/go/sam3-cpp/gosam3.cpp
@@ -0,0 +1,193 @@
+#include "sam3.h"
+#include "gosam3.h"
+
+#include <cstdio>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_STATIC
+#include "stb_image_write.h"
+
+// Static state
+static std::shared_ptr<sam3_model> g_model;
+static sam3_state_ptr g_state;
+static sam3_result g_result;
+static std::vector<std::vector<unsigned char>> g_mask_pngs;
+
+// Callback for stbi_write_png_to_mem via stbi_write_png_to_func
+static void png_write_callback(void *context, void *data, int size) {
+    auto *buf = static_cast<std::vector<unsigned char>*>(context);
+    auto *bytes = static_cast<unsigned char*>(data);
+    buf->insert(buf->end(), bytes, bytes + size);
+}
+
+// Encode all masks as PNGs after segmentation
+static void encode_masks_as_png() {
+    g_mask_pngs.clear();
+    g_mask_pngs.resize(g_result.detections.size());
+
+    for (size_t i = 0; i < g_result.detections.size(); i++) {
+        const auto &mask = g_result.detections[i].mask;
+        if (mask.width > 0 && mask.height > 0 && !mask.data.empty()) {
+            stbi_write_png_to_func(png_write_callback, &g_mask_pngs[i],
+                                   mask.width, mask.height, 1,
+                                   mask.data.data(), mask.width);
+        }
+    }
+}
+
+extern "C" {
+
+int sam3_cpp_load_model(const char *model_path, int threads) {
+    sam3_params params;
+    params.model_path = model_path;
+    params.n_threads = threads;
+    params.use_gpu = true;
+
+    g_model = sam3_load_model(params);
+    if (!g_model) {
+        fprintf(stderr, "[sam3-cpp] Failed to load model: %s\n", model_path);
+        return 1;
+    }
+
+    g_state = sam3_create_state(*g_model, params);
+    if (!g_state) {
+        fprintf(stderr, "[sam3-cpp] Failed to create state\n");
+        g_model.reset();
+        return 2;
+    }
+
+    fprintf(stderr, "[sam3-cpp] Model loaded: %s (threads=%d)\n", model_path, threads);
+    return 0;
+}
+
+int sam3_cpp_encode_image(const char *image_path) {
+    if (!g_model || !g_state) {
+        fprintf(stderr, "[sam3-cpp] Model not loaded\n");
+        return 1;
+    }
+
+    sam3_image img = sam3_load_image(image_path);
+    if (img.data.empty()) {
+        fprintf(stderr, "[sam3-cpp] Failed to load image: %s\n", image_path);
+        return 2;
+    }
+
+    if (!sam3_encode_image(*g_state, *g_model, img)) {
+        fprintf(stderr, "[sam3-cpp] Failed to encode image\n");
+        return 3;
+    }
+
+    return 0;
+}
+
+int sam3_cpp_segment_pvs(float *points, int n_point_triples,
+                         float *boxes, int n_box_quads,
+                         float threshold) {
+    if (!g_model || !g_state) {
+        return -1;
+    }
+
+    sam3_pvs_params pvs_params;
+
+    // Parse points: each triple is [x, y, label]
+    for (int i = 0; i < n_point_triples; i++) {
+        float x = points[i * 3];
+        float y = points[i * 3 + 1];
+        float label = points[i * 3 + 2];
+        sam3_point pt = {x, y};
+        if (label > 0.5f) {
+            pvs_params.pos_points.push_back(pt);
+        } else {
+            pvs_params.neg_points.push_back(pt);
+        }
+    }
+
+    // Parse boxes: each quad is [x1, y1, x2, y2], use only first box
+    if (n_box_quads > 0) {
+        pvs_params.box = {boxes[0], boxes[1], boxes[2], boxes[3]};
+        pvs_params.use_box = true;
+    }
+
+    g_result = sam3_segment_pvs(*g_state, *g_model, pvs_params);
+    encode_masks_as_png();
+
+    return static_cast<int>(g_result.detections.size());
+}
+
+int sam3_cpp_segment_pcs(const char *text_prompt, float threshold) {
+    if (!g_model || !g_state) {
+        return -1;
+    }
+
+    // PCS mode requires SAM 3 (full model with text encoder)
+    if (sam3_is_visual_only(*g_model) ||
+        sam3_get_model_type(*g_model) != SAM3_MODEL_SAM3) {
+        fprintf(stderr, "[sam3-cpp] PCS mode requires full SAM 3 model\n");
+        return -1;
+    }
+
+    sam3_pcs_params pcs_params;
+    pcs_params.text_prompt = text_prompt;
+    pcs_params.score_threshold = threshold > 0 ? threshold : 0.5f;
+
+    g_result = sam3_segment_pcs(*g_state, *g_model, pcs_params);
+    encode_masks_as_png();
+
+    return static_cast<int>(g_result.detections.size());
+}
+
+int sam3_cpp_get_n_detections(void) {
+    return static_cast<int>(g_result.detections.size());
+}
+
+float sam3_cpp_get_detection_x(int i) {
+    if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
+    return g_result.detections[i].box.x0;
+}
+
+float sam3_cpp_get_detection_y(int i) {
+    if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
+    return g_result.detections[i].box.y0;
+}
+
+float sam3_cpp_get_detection_w(int i) {
+    if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
+    const auto &box = g_result.detections[i].box;
+    return box.x1 - box.x0;
+}
+
+float sam3_cpp_get_detection_h(int i) {
+    if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
+    const auto &box = g_result.detections[i].box;
+    return box.y1 - box.y0;
+}
+
+float sam3_cpp_get_detection_score(int i) {
+    if (i < 0 || i >= static_cast<int>(g_result.detections.size())) return 0;
+    return g_result.detections[i].score;
+}
+
+int sam3_cpp_get_detection_mask_png(int i, unsigned char *buf, int buf_size) {
+    if (i < 0 || i >= static_cast<int>(g_mask_pngs.size())) return 0;
+
+    const auto &png = g_mask_pngs[i];
+    int size = static_cast<int>(png.size());
+
+    if (buf == nullptr) {
+        return size;
+    }
+
+    int to_copy = size < buf_size ? size : buf_size;
+    memcpy(buf, png.data(), to_copy);
+    return to_copy;
+}
+
+void sam3_cpp_free_results(void) {
+    g_result.detections.clear();
+    g_mask_pngs.clear();
+}
+
+} // extern "C"
--- a/backend/go/sam3-cpp/gosam3.go
+++ b/backend/go/sam3-cpp/gosam3.go
@@ -0,0 +1,143 @@
+package main
+
+import (
+	"encoding/base64"
+	"fmt"
+	"os"
+	"path/filepath"
+	"unsafe"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+type SAM3 struct {
+	base.SingleThread
+}
+
+var (
+	CppLoadModel        func(modelPath string, threads int) int
+	CppEncodeImage      func(imagePath string) int
+	CppSegmentPVS       func(points uintptr, nPointTriples int, boxes uintptr, nBoxQuads int, threshold float32) int
+	CppSegmentPCS       func(textPrompt string, threshold float32) int
+	CppGetNDetections   func() int
+	CppGetDetectionX    func(i int) float32
+	CppGetDetectionY    func(i int) float32
+	CppGetDetectionW    func(i int) float32
+	CppGetDetectionH    func(i int) float32
+	CppGetDetectionScore func(i int) float32
+	CppGetDetectionMaskPNG func(i int, buf uintptr, bufSize int) int
+	CppFreeResults      func()
+)
+
+func (s *SAM3) Load(opts *pb.ModelOptions) error {
+	modelFile := opts.ModelFile
+	if modelFile == "" {
+		modelFile = opts.Model
+	}
+
+	var modelPath string
+	if filepath.IsAbs(modelFile) {
+		modelPath = modelFile
+	} else {
+		modelPath = filepath.Join(opts.ModelPath, modelFile)
+	}
+
+	threads := int(opts.Threads)
+	if threads <= 0 {
+		threads = 4
+	}
+
+	ret := CppLoadModel(modelPath, threads)
+	if ret != 0 {
+		return fmt.Errorf("failed to load SAM3 model (error %d): %s", ret, modelPath)
+	}
+
+	return nil
+}
+
+func (s *SAM3) Detect(opts *pb.DetectOptions) (pb.DetectResponse, error) {
+	// Decode base64 image and write to temp file
+	imgData, err := base64.StdEncoding.DecodeString(opts.Src)
+	if err != nil {
+		return pb.DetectResponse{}, fmt.Errorf("failed to decode image: %w", err)
+	}
+
+	tmpFile, err := os.CreateTemp("", "sam3-*.png")
+	if err != nil {
+		return pb.DetectResponse{}, fmt.Errorf("failed to create temp file: %w", err)
+	}
+	defer os.Remove(tmpFile.Name())
+
+	if _, err := tmpFile.Write(imgData); err != nil {
+		tmpFile.Close()
+		return pb.DetectResponse{}, fmt.Errorf("failed to write temp file: %w", err)
+	}
+	tmpFile.Close()
+
+	// Encode image
+	ret := CppEncodeImage(tmpFile.Name())
+	if ret != 0 {
+		return pb.DetectResponse{}, fmt.Errorf("failed to encode image (error %d)", ret)
+	}
+
+	threshold := opts.Threshold
+	if threshold <= 0 {
+		threshold = 0.5
+	}
+
+	// Determine segmentation mode
+	var nDetections int
+	if opts.Prompt != "" {
+		// Text-prompted segmentation (PCS mode, SAM 3 only)
+		nDetections = CppSegmentPCS(opts.Prompt, threshold)
+	} else {
+		// Point/box-prompted segmentation (PVS mode)
+		var pointsPtr uintptr
+		var boxesPtr uintptr
+		nPointTriples := len(opts.Points) / 3
+		nBoxQuads := len(opts.Boxes) / 4
+
+		if nPointTriples > 0 {
+			pointsPtr = uintptr(unsafe.Pointer(&opts.Points[0]))
+		}
+		if nBoxQuads > 0 {
+			boxesPtr = uintptr(unsafe.Pointer(&opts.Boxes[0]))
+		}
+
+		nDetections = CppSegmentPVS(pointsPtr, nPointTriples, boxesPtr, nBoxQuads, threshold)
+	}
+
+	if nDetections < 0 {
+		return pb.DetectResponse{}, fmt.Errorf("segmentation failed")
+	}
+
+	defer CppFreeResults()
+
+	// Build response
+	detections := make([]*pb.Detection, nDetections)
+	for i := 0; i < nDetections; i++ {
+		det := &pb.Detection{
+			X:          CppGetDetectionX(i),
+			Y:          CppGetDetectionY(i),
+			Width:      CppGetDetectionW(i),
+			Height:     CppGetDetectionH(i),
+			Confidence: CppGetDetectionScore(i),
+			ClassName:  "segment",
+		}
+
+		// Get mask PNG
+		maskSize := CppGetDetectionMaskPNG(i, 0, 0)
+		if maskSize > 0 {
+			maskBuf := make([]byte, maskSize)
+			CppGetDetectionMaskPNG(i, uintptr(unsafe.Pointer(&maskBuf[0])), maskSize)
+			det.Mask = maskBuf
+		}
+
+		detections[i] = det
+	}
+
+	return pb.DetectResponse{
+		Detections: detections,
+	}, nil
+}
--- a/backend/go/sam3-cpp/gosam3.h
+++ b/backend/go/sam3-cpp/gosam3.h
@@ -0,0 +1,51 @@
+#ifndef GOSAM3_H
+#define GOSAM3_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Load model from file. Returns 0 on success, non-zero on failure.
+int sam3_cpp_load_model(const char *model_path, int threads);
+
+// Encode an image from file path. Must be called before segmentation.
+// Returns 0 on success.
+int sam3_cpp_encode_image(const char *image_path);
+
+// Segment with point/box prompts (PVS mode).
+// points: flat array of [x, y, label] triples (label: 1=positive, 0=negative)
+// boxes: flat array of [x1, y1, x2, y2] quads
+// Returns number of detections, or -1 on error.
+int sam3_cpp_segment_pvs(float *points, int n_point_triples,
+                         float *boxes, int n_box_quads,
+                         float threshold);
+
+// Segment with text prompt (PCS mode, SAM 3 only).
+// Returns number of detections, or -1 on error.
+int sam3_cpp_segment_pcs(const char *text_prompt, float threshold);
+
+// Access detection results (valid after a segment call).
+int sam3_cpp_get_n_detections(void);
+
+// Get bounding box for detection i (as x, y, width, height).
+float sam3_cpp_get_detection_x(int i);
+float sam3_cpp_get_detection_y(int i);
+float sam3_cpp_get_detection_w(int i);
+float sam3_cpp_get_detection_h(int i);
+
+// Get confidence score for detection i.
+float sam3_cpp_get_detection_score(int i);
+
+// Get mask as PNG-encoded bytes.
+// If buf is NULL, returns the required buffer size.
+// Otherwise writes up to buf_size bytes and returns bytes written.
+int sam3_cpp_get_detection_mask_png(int i, unsigned char *buf, int buf_size);
+
+// Free current detection results.
+void sam3_cpp_free_results(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GOSAM3_H
--- a/backend/go/sam3-cpp/main.go
+++ b/backend/go/sam3-cpp/main.go
@@ -0,0 +1,56 @@
+package main
+
+import (
+	"flag"
+	"os"
+
+	"github.com/ebitengine/purego"
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
+)
+
+var (
+	addr = flag.String("addr", "localhost:50051", "the address to connect to")
+)
+
+type LibFuncs struct {
+	FuncPtr any
+	Name    string
+}
+
+func main() {
+	// Get library name from environment variable, default to fallback
+	libName := os.Getenv("SAM3_LIBRARY")
+	if libName == "" {
+		libName = "./libgosam3-fallback.so"
+	}
+
+	gosamLib, err := purego.Dlopen(libName, purego.RTLD_NOW|purego.RTLD_GLOBAL)
+	if err != nil {
+		panic(err)
+	}
+
+	libFuncs := []LibFuncs{
+		{&CppLoadModel, "sam3_cpp_load_model"},
+		{&CppEncodeImage, "sam3_cpp_encode_image"},
+		{&CppSegmentPVS, "sam3_cpp_segment_pvs"},
+		{&CppSegmentPCS, "sam3_cpp_segment_pcs"},
+		{&CppGetNDetections, "sam3_cpp_get_n_detections"},
+		{&CppGetDetectionX, "sam3_cpp_get_detection_x"},
+		{&CppGetDetectionY, "sam3_cpp_get_detection_y"},
+		{&CppGetDetectionW, "sam3_cpp_get_detection_w"},
+		{&CppGetDetectionH, "sam3_cpp_get_detection_h"},
+		{&CppGetDetectionScore, "sam3_cpp_get_detection_score"},
+		{&CppGetDetectionMaskPNG, "sam3_cpp_get_detection_mask_png"},
+		{&CppFreeResults, "sam3_cpp_free_results"},
+	}
+
+	for _, lf := range libFuncs {
+		purego.RegisterLibFunc(lf.FuncPtr, gosamLib, lf.Name)
+	}
+
+	flag.Parse()
+
+	if err := grpc.StartServer(*addr, &SAM3{}); err != nil {
+		panic(err)
+	}
+}
--- a/backend/go/sam3-cpp/package.sh
+++ b/backend/go/sam3-cpp/package.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Script to copy the appropriate libraries based on architecture
+
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+REPO_ROOT="${CURDIR}/../../.."
+
+# Create lib directory
+mkdir -p $CURDIR/package/lib
+
+cp -avf $CURDIR/libgosam3-*.so $CURDIR/package/
+cp -avf $CURDIR/sam3-cpp $CURDIR/package/
+cp -fv $CURDIR/run.sh $CURDIR/package/
+
+# Detect architecture and copy appropriate libraries
+if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
+    # x86_64 architecture
+    echo "Detected x86_64 architecture, copying x86_64 libraries..."
+    cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
+    # ARM64 architecture
+    echo "Detected ARM64 architecture, copying ARM64 libraries..."
+    cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
+    cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
+    cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
+    cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
+    cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
+elif [ $(uname -s) = "Darwin" ]; then
+    echo "Detected Darwin"
+else
+    echo "Error: Could not detect architecture"
+    exit 1
+fi
+
+# Package GPU libraries based on BUILD_TYPE
+GPU_LIB_SCRIPT="${REPO_ROOT}/scripts/build/package-gpu-libs.sh"
+if [ -f "$GPU_LIB_SCRIPT" ]; then
+    echo "Packaging GPU libraries for BUILD_TYPE=${BUILD_TYPE:-cpu}..."
+    source "$GPU_LIB_SCRIPT" "$CURDIR/package/lib"
+    package_gpu_libs
+fi
+
+echo "Packaging completed successfully"
+ls -liah $CURDIR/package/
+ls -liah $CURDIR/package/lib/
--- a/backend/go/sam3-cpp/run.sh
+++ b/backend/go/sam3-cpp/run.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -ex
+
+# Get the absolute current dir where the script is located
+CURDIR=$(dirname "$(realpath $0)")
+
+cd /
+
+echo "CPU info:"
+if [ "$(uname)" != "Darwin" ]; then
+	grep -e "model\sname" /proc/cpuinfo | head -1
+	grep -e "flags" /proc/cpuinfo | head -1
+fi
+
+LIBRARY="$CURDIR/libgosam3-fallback.so"
+
+if [ "$(uname)" != "Darwin" ]; then
+	if grep -q -e "\savx\s" /proc/cpuinfo ; then
+		echo "CPU:    AVX    found OK"
+		if [ -e $CURDIR/libgosam3-avx.so ]; then
+			LIBRARY="$CURDIR/libgosam3-avx.so"
+		fi
+	fi
+
+	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
+		echo "CPU:    AVX2   found OK"
+		if [ -e $CURDIR/libgosam3-avx2.so ]; then
+			LIBRARY="$CURDIR/libgosam3-avx2.so"
+		fi
+	fi
+
+	# Check avx 512
+	if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
+		echo "CPU:    AVX512F found OK"
+		if [ -e $CURDIR/libgosam3-avx512.so ]; then
+			LIBRARY="$CURDIR/libgosam3-avx512.so"
+		fi
+	fi
+fi
+
+export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
+export SAM3_LIBRARY=$LIBRARY
+
+# If there is a lib/ld.so, use it
+if [ -f $CURDIR/lib/ld.so ]; then
+	echo "Using lib/ld.so"
+	echo "Using library: $LIBRARY"
+	exec $CURDIR/lib/ld.so $CURDIR/sam3-cpp "$@"
+fi
+
+echo "Using library: $LIBRARY"
+exec $CURDIR/sam3-cpp "$@"
--- a/backend/go/sam3-cpp/test.sh
+++ b/backend/go/sam3-cpp/test.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -e
+
+CURDIR=$(dirname "$(realpath $0)")
+
+echo "Running sam3-cpp backend tests..."
+
+# The test requires a SAM model in GGML format.
+# Uses EdgeTAM Q4_0 (~15MB) for fast CI testing.
+SAM3_MODEL_DIR="${SAM3_MODEL_DIR:-$CURDIR/test-models}"
+SAM3_MODEL_FILE="${SAM3_MODEL_FILE:-edgetam_q4_0.ggml}"
+SAM3_MODEL_URL="${SAM3_MODEL_URL:-https://huggingface.co/PABannier/sam3.cpp/resolve/main/edgetam_q4_0.ggml}"
+
+# Download model if not present
+if [ ! -f "$SAM3_MODEL_DIR/$SAM3_MODEL_FILE" ]; then
+    echo "Downloading EdgeTAM Q4_0 model for testing..."
+    mkdir -p "$SAM3_MODEL_DIR"
+    curl -L -o "$SAM3_MODEL_DIR/$SAM3_MODEL_FILE" "$SAM3_MODEL_URL" --progress-bar
+    echo "Model downloaded."
+fi
+
+# Create a test image (4x4 red pixel PNG) using base64
+# This is a minimal valid PNG for testing the pipeline
+TEST_IMAGE_DIR="$CURDIR/test-data"
+mkdir -p "$TEST_IMAGE_DIR"
+
+# Generate a simple test image using Python if available, otherwise use a pre-encoded one
+if command -v python3 &> /dev/null; then
+    python3 -c "
+import struct, zlib, base64
+def create_png(width, height, r, g, b):
+    raw = b''
+    for y in range(height):
+        raw += b'\x00'  # filter byte
+        for x in range(width):
+            raw += bytes([r, g, b])
+    def chunk(ctype, data):
+        c = ctype + data
+        return struct.pack('>I', len(data)) + c + struct.pack('>I', zlib.crc32(c) & 0xffffffff)
+    ihdr = struct.pack('>IIBBBBB', width, height, 8, 2, 0, 0, 0)
+    return b'\x89PNG\r\n\x1a\n' + chunk(b'IHDR', ihdr) + chunk(b'IDAT', zlib.compress(raw)) + chunk(b'IEND', b'')
+with open('$TEST_IMAGE_DIR/test.png', 'wb') as f:
+    f.write(create_png(64, 64, 255, 0, 0))
+"
+    echo "Test image created."
+fi
+
+echo "sam3-cpp test setup complete."
+echo "Model: $SAM3_MODEL_DIR/$SAM3_MODEL_FILE"
+echo "Note: Full integration tests run via the LocalAI test-extra target."
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -125,6 +125,31 @@
    nvidia-cuda-13: "cuda13-rfdetr"
    nvidia-cuda-12: "cuda12-rfdetr"
    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-rfdetr"
+- &sam3cpp
+  name: "sam3-cpp"
+  alias: "sam3-cpp"
+  license: mit
+  description: |
+    Segment Anything Model (SAM 3/2/EdgeTAM) in C/C++ using GGML.
+    Supports text-prompted and point/box-prompted image segmentation.
+  urls:
+    - https://github.com/PABannier/sam3.cpp
+  tags:
+    - image-segmentation
+    - object-detection
+    - sam3
+    - gpu
+    - cpu
+  capabilities:
+    default: "cpu-sam3-cpp"
+    nvidia: "cuda12-sam3-cpp"
+    nvidia-cuda-12: "cuda12-sam3-cpp"
+    nvidia-cuda-13: "cuda13-sam3-cpp"
+    nvidia-l4t: "nvidia-l4t-arm64-sam3-cpp"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-sam3-cpp"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp"
+    intel: "intel-sycl-f32-sam3-cpp"
+    vulkan: "vulkan-sam3-cpp"
 - &vllm
  name: "vllm"
  license: apache-2.0
@@ -1628,6 +1653,89 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-rfdetr"
  mirrors:
    - localai/localai-backends:master-metal-darwin-arm64-rfdetr
+## sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "sam3-cpp-development"
+  capabilities:
+    default: "cpu-sam3-cpp-development"
+    nvidia: "cuda12-sam3-cpp-development"
+    nvidia-cuda-12: "cuda12-sam3-cpp-development"
+    nvidia-cuda-13: "cuda13-sam3-cpp-development"
+    nvidia-l4t: "nvidia-l4t-arm64-sam3-cpp-development"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-sam3-cpp-development"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-sam3-cpp-development"
+    intel: "intel-sycl-f32-sam3-cpp-development"
+    vulkan: "vulkan-sam3-cpp-development"
+- !!merge <<: *sam3cpp
+  name: "cpu-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-cpu-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cpu-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-cpu-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cuda12-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cuda12-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-12-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cuda13-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cuda13-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-13-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "nvidia-l4t-arm64-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-arm64-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "nvidia-l4t-arm64-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-arm64-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cuda13-nvidia-l4t-arm64-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "cuda13-nvidia-l4t-arm64-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "intel-sycl-f32-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f32-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "intel-sycl-f32-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-intel-sycl-f32-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "vulkan-sam3-cpp"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:latest-gpu-vulkan-sam3-cpp
+- !!merge <<: *sam3cpp
+  name: "vulkan-sam3-cpp-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-vulkan-sam3-cpp"
+  mirrors:
+    - localai/localai-backends:master-gpu-vulkan-sam3-cpp
 ## Rerankers
 - !!merge <<: *rerankers
  name: "rerankers-development"
--- a/core/backend/detection.go
+++ b/core/backend/detection.go
@@ -13,6 +13,10 @@ import (

 func Detection(
 	sourceFile string,
+	prompt string,
+	points []float32,
+	boxes []float32,
+	threshold float32,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
 	modelConfig config.ModelConfig,
@@ -35,7 +39,11 @@ func Detection(
 	}

 	res, err := detectionModel.Detect(context.Background(), &proto.DetectOptions{
-		Src: sourceFile,
+		Src:       sourceFile,
+		Prompt:    prompt,
+		Points:    points,
+		Boxes:     boxes,
+		Threshold: threshold,
 	})

 	if appConfig.EnableTracing {
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -705,7 +705,8 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecase) bool {
 	}

 	if (u & FLAG_DETECTION) == FLAG_DETECTION {
-		if c.Backend != "rfdetr" {
+		detectionBackends := []string{"rfdetr", "sam3-cpp"}
+		if !slices.Contains(detectionBackends, c.Backend) {
 			return false
 		}
 	}
--- a/core/http/endpoints/localai/detection.go
+++ b/core/http/endpoints/localai/detection.go
@@ -1,6 +1,8 @@
 package localai

 import (
+	"encoding/base64"
+
 	"github.com/labstack/echo/v4"
 	"github.com/mudler/LocalAI/core/backend"
 	"github.com/mudler/LocalAI/core/config"
@@ -37,7 +39,7 @@ func DetectionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
 			return err
 		}

-		res, err := backend.Detection(image, ml, appConfig, *cfg)
+		res, err := backend.Detection(image, input.Prompt, input.Points, input.Boxes, input.Threshold, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
@@ -46,12 +48,18 @@ func DetectionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
 			Detections: make([]schema.Detection, len(res.Detections)),
 		}
 		for i, detection := range res.Detections {
+			var mask string
+			if len(detection.Mask) > 0 {
+				mask = base64.StdEncoding.EncodeToString(detection.Mask)
+			}
 			response.Detections[i] = schema.Detection{
-				X:         detection.X,
-				Y:         detection.Y,
-				Width:     detection.Width,
-				Height:    detection.Height,
-				ClassName: detection.ClassName,
+				X:          detection.X,
+				Y:          detection.Y,
+				Width:      detection.Width,
+				Height:     detection.Height,
+				ClassName:  detection.ClassName,
+				Confidence: detection.Confidence,
+				Mask:       mask,
 			}
 		}

--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -152,7 +152,11 @@ type SystemInformationResponse struct {

 type DetectionRequest struct {
 	BasicModelRequest
-	Image string `json:"image"` // URL or base64-encoded image to analyze
+	Image     string    `json:"image"`               // URL or base64-encoded image to analyze
+	Prompt    string    `json:"prompt,omitempty"`     // Text prompt (for SAM 3 PCS mode)
+	Points    []float32 `json:"points,omitempty"`     // Point coordinates as [x,y,label,...] triples (label: 1=pos, 0=neg)
+	Boxes     []float32 `json:"boxes,omitempty"`      // Box coordinates as [x1,y1,x2,y2,...] quads
+	Threshold float32   `json:"threshold,omitempty"`  // Detection confidence threshold
 }

 type DetectionResponse struct {
@@ -160,11 +164,13 @@ type DetectionResponse struct {
 }

 type Detection struct {
-	X         float32 `json:"x"`
-	Y         float32 `json:"y"`
-	Width     float32 `json:"width"`
-	Height    float32 `json:"height"`
-	ClassName string  `json:"class_name"`
+	X          float32 `json:"x"`
+	Y          float32 `json:"y"`
+	Width      float32 `json:"width"`
+	Height     float32 `json:"height"`
+	ClassName  string  `json:"class_name"`
+	Confidence float32 `json:"confidence,omitempty"`
+	Mask       string  `json:"mask,omitempty"` // base64-encoded PNG segmentation mask
 }

 type ImportModelRequest struct {
--- a/docs/content/features/object-detection.md
+++ b/docs/content/features/object-detection.md
@@ -5,7 +5,7 @@ weight = 13
 url = "/features/object-detection/"
 +++

-LocalAI supports object detection through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Currently, [RF-DETR](https://github.com/roboflow/rf-detr) is available as an implementation.
+LocalAI supports object detection and image segmentation through various backends. This feature allows you to identify and locate objects within images with high accuracy and real-time performance. Available backends include [RF-DETR](https://github.com/roboflow/rf-detr) for object detection and [sam3.cpp](https://github.com/PABannier/sam3.cpp) for image segmentation (SAM 3/2/EdgeTAM).

 ## Overview

@@ -14,6 +14,8 @@ Object detection in LocalAI is implemented through dedicated backends that can i
 **Key Features:**
 - Real-time object detection
 - High accuracy detection with bounding boxes
+- Image segmentation with binary masks (SAM backends)
+- Text-prompted, point-prompted, and box-prompted segmentation
 - Support for multiple hardware accelerators (CPU, NVIDIA GPU, Intel GPU, AMD GPU)
 - Structured detection results with confidence scores
 - Easy integration through the `/v1/detection` endpoint
@@ -45,6 +47,10 @@ The request body should contain:
 - `image`: The image to analyze, which can be:
  - A URL to an image
  - A base64-encoded image
+- `prompt` (optional): Text prompt for text-prompted segmentation (SAM 3 only)
+- `points` (optional): Point coordinates as `[x, y, label, ...]` triples (label: 1=positive, 0=negative)
+- `boxes` (optional): Box coordinates as `[x1, y1, x2, y2, ...]` quads
+- `threshold` (optional): Detection confidence threshold (default: 0.5)

 ### Response Format

@@ -78,6 +84,7 @@ Each detection includes:
 - `width`, `height`: Dimensions of the bounding box
 - `confidence`: Detection confidence score (0.0 to 1.0)
 - `class_name`: The detected object class
+- `mask` (optional): Base64-encoded PNG binary segmentation mask (SAM backends only)

 ## Backends

@@ -123,6 +130,76 @@ Currently, the following model is available in the [Model Gallery]({{%relref "fe

 You can browse and install this model through the LocalAI web interface or using the command line.

+### SAM3 Backend (sam3-cpp)
+
+The sam3-cpp backend provides image segmentation using [sam3.cpp](https://github.com/PABannier/sam3.cpp), a portable C++ implementation of Meta's Segment Anything Model. It supports multiple model architectures:
+
+- **SAM 3**: Full model with text encoder for text-prompted detection and segmentation
+- **SAM 2 / SAM 2.1**: Hiera backbone models in multiple sizes
+- **SAM 3 Visual-Only**: Point/box segmentation without text encoder
+- **EdgeTAM**: Ultra-efficient mobile variant (~15MB quantized)
+
+#### Setup
+
+1. **Manual Configuration**
+
+   Create a model configuration file in your `models` directory:
+
+   ```yaml
+   name: sam3
+   backend: sam3-cpp
+   parameters:
+     model: edgetam_q4_0.ggml
+     threads: 4
+   known_usecases:
+     - detection
+   ```
+
+   Download the model from [Hugging Face](https://huggingface.co/PABannier/sam3.cpp).
+
+#### Segmentation Modes
+
+**Point-prompted segmentation** (all models):
+
+```bash
+curl -X POST http://localhost:8080/v1/detection \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "sam3",
+    "image": "data:image/jpeg;base64,...",
+    "points": [256.0, 256.0, 1.0],
+    "threshold": 0.5
+  }'
+```
+
+**Box-prompted segmentation** (all models):
+
+```bash
+curl -X POST http://localhost:8080/v1/detection \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "sam3",
+    "image": "data:image/jpeg;base64,...",
+    "boxes": [100.0, 100.0, 400.0, 400.0],
+    "threshold": 0.5
+  }'
+```
+
+**Text-prompted segmentation** (SAM 3 full model only):
+
+```bash
+curl -X POST http://localhost:8080/v1/detection \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "sam3",
+    "image": "data:image/jpeg;base64,...",
+    "prompt": "cat",
+    "threshold": 0.5
+  }'
+```
+
+The response includes segmentation masks as base64-encoded PNGs in the `mask` field of each detection.
+
 ## Examples

 ### Basic Object Detection
@@ -180,6 +257,7 @@ local-ai run --debug rfdetr-base
 LocalAI includes a dedicated **object-detection** category for models and backends that specialize in identifying and locating objects within images. This category currently includes:

 - **RF-DETR**: Real-time transformer-based object detection
+- **sam3-cpp**: SAM 3/2/EdgeTAM image segmentation

 Additional object detection models and backends will be added to this category in the future. You can filter models by the `object-detection` tag in the model gallery to find all available object detection models.

--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3134,6 +3134,37 @@
      model: rfdetr-base
    known_usecases:
      - detection
+- &sam3cpp
+  name: "edgetam"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  size: "16MB"
+  license: apache-2.0
+  description: |
+    EdgeTAM is an ultra-efficient variant of the Segment Anything Model (SAM) for image segmentation.
+    It uses a RepViT backbone and is only ~16MB quantized (Q4_0), making it ideal for edge deployment.
+    Supports point-prompted and box-prompted image segmentation via the /v1/detection endpoint.
+    Powered by sam3.cpp (C/C++ with GGML).
+  tags:
+    - image-segmentation
+    - object-detection
+    - sam3
+    - edgetam
+    - cpu
+    - gpu
+  urls:
+    - https://github.com/PABannier/sam3.cpp
+    - https://huggingface.co/PABannier/sam3.cpp
+  overrides:
+    backend: sam3-cpp
+    parameters:
+      model: edgetam_q4_0.ggml
+      threads: 4
+    known_usecases:
+      - detection
+  files:
+    - filename: edgetam_q4_0.ggml
+      sha256: a8a35e35fb9a1b6f099c3f35e3024548b0fc979c2a4184642562804192496e09
+      uri: huggingface://PABannier/sam3.cpp/edgetam_q4_0.ggml
 - name: "dream-org_dream-v0-instruct-7b"
  # chatml
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"