Files
LocalAI/backend/go/rfdetr-cpp/gorfdetrcpp.go
LocalAI [bot] 7a4ca8f60d feat(backend): rfdetr-cpp native object detection + segmentation backend (#10028)
Adds a Go native gRPC backend that dlopens librfdetrcpp.so (built from
mudler/rf-detr.cpp at the pinned RFDETR_VERSION) via purego and exposes
the rfdetr.cpp inference pipeline through LocalAI's existing Detect RPC.

Supports all 5 RF-DETR detection variants (Nano/Small/Base/Medium/Large)
and 6 segmentation variants (SegNano/SegSmall/SegMedium/SegLarge/
SegXLarge/Seg2XLarge) with F32/F16/Q8_0/Q4_K quantizations. Pre-built
GGUFs ship at mudler/rfdetr-cpp-* on HuggingFace.

Detection returns Bbox + class_name + confidence; segmentation also
returns PNG-encoded per-detection masks via the rfdetr_capi accessor
functions (rfdetr_capi_get_detection_{class_id,box,score,class_name,
mask_png}).

End-to-end verified through POST /v1/detection: HTTP -> gRPC -> purego
dlopen -> rfdetr.cpp -> ggml -> response (9 detections on the detection
model, 21 detections + valid PNG masks on the seg-nano model against
the kitchen fixture).

Wiring:
  - backend/go/rfdetr-cpp/{main.go,gorfdetrcpp.go,CMakeLists.txt,
    Makefile,run.sh,package.sh,test.sh,.gitignore}
  - Top-level Makefile: BACKEND_RFDETR_CPP, docker-build target,
    .NOTPARALLEL, prepare-test-extra, test-extra
  - backend/go/rfdetr-cpp/Makefile: `test` target invoked by test-extra
  - .github/backend-matrix.yml: CPU + CUDA-12/13 + L4T CUDA-12/13
    (arm64) + HIP + Vulkan (amd64 + arm64) + SYCL f32/f16
  - backend/index.yaml: rfdetr-cpp meta anchor + latest/development
    image entries for every matrix tag-suffix
  - .github/workflows/bump_deps.yaml: RFDETR_VERSION pin tracking
    (mudler/rf-detr.cpp branch main)
  - gallery/index.yaml: 11 rfdetr-cpp-* entries (nano + 4 detection
    variants + 6 seg variants), all backed by mudler/rfdetr-cpp-*
    on HuggingFace with sha256 pinning on the F16 default
  - core/gallery/importers/rfdetr.go: GGUF auto-routing for HF imports
    (mudler/rfdetr-cpp-* repos route to rfdetr-cpp, Transformer-format
    repos stay on the Python rfdetr backend; explicit preferences.backend
    overrides both heuristics)
  - core/gallery/importers/rfdetr_test.go: table-driven coverage of the
    auto-routing + a live mudler/rfdetr-cpp-nano cross-check

scripts/changed-backends.js needs no change: the existing
Dockerfile.golang -> backend/go/${item.backend}/ branch already routes
the 9 rfdetr-cpp matrix entries to the correct backend path.

Assisted-by: Claude:claude-opus-4-7 [Claude Code]

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-27 18:43:57 +02:00

196 lines
6.4 KiB
Go

package main
// gorfdetrcpp.go - gRPC handlers (Load, Detect) for the rfdetr-cpp backend.
//
// Embeds base.SingleThread to default unimplemented RPCs to "not supported"
// while we only implement object detection.
import (
"encoding/base64"
"fmt"
"os"
"path/filepath"
"strconv"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
// Default upper bound on detections returned per image. RF-DETR's decoder
// queries are limited to a few hundred; 300 is a safe ceiling.
const defaultTopK = 300
// rfdetr_handle_t is a uintptr-typed opaque handle (see include/rfdetr_capi.h).
var (
// rfdetr_capi_load(const char* model_path, int n_threads, rfdetr_handle_t* out_handle) -> int
CapiLoad func(modelPath string, nThreads int32, outHandle *uintptr) int32
// rfdetr_capi_unload(rfdetr_handle_t handle) -> int
CapiUnload func(handle uintptr) int32
// rfdetr_capi_detect_path(handle, image_path, threshold, top_k, out_json) -> int
CapiDetectPath func(handle uintptr, imagePath string, threshold float32, topK uint32, outJSON *uintptr) int32
// rfdetr_capi_detect_buffer(handle, bytes, len, threshold, top_k, out_json) -> int
CapiDetectBuffer func(handle uintptr, bytes uintptr, length uintptr, threshold float32, topK uint32, outJSON *uintptr) int32
// rfdetr_capi_free_string(char* s)
CapiFreeString func(s uintptr)
// rfdetr_capi_get_n_detections(handle) -> int
CapiGetNDetections func(handle uintptr) int32
// rfdetr_capi_get_detection_class_id(handle, i) -> int
CapiGetDetectionClassID func(handle uintptr, i int32) int32
// rfdetr_capi_get_detection_box(handle, i, out_xyxy[4]) -> int (0 on success)
CapiGetDetectionBox func(handle uintptr, i int32, outXYXY uintptr) int32
// rfdetr_capi_get_detection_score(handle, i) -> float
CapiGetDetectionScore func(handle uintptr, i int32) float32
// rfdetr_capi_get_detection_class_name(handle, i, buf, buf_size) -> int (needed/written; two-call sizing)
CapiGetDetectionClassName func(handle uintptr, i int32, buf uintptr, bufSize int32) int32
// rfdetr_capi_get_detection_mask_png(handle, i, buf, buf_size) -> int (needed/written; 0 means no mask)
CapiGetDetectionMaskPNG func(handle uintptr, i int32, buf uintptr, bufSize int32) int32
)
type RFDetrCpp struct {
base.SingleThread
handle uintptr
}
// Load loads the GGUF model at opts.ModelFile (joined with opts.ModelPath if relative)
// and stores the handle for later Detect calls.
func (r *RFDetrCpp) Load(opts *pb.ModelOptions) error {
modelFile := opts.ModelFile
if modelFile == "" {
modelFile = opts.Model
}
if modelFile == "" {
return fmt.Errorf("rfdetr-cpp: ModelFile is empty")
}
var modelPath string
if filepath.IsAbs(modelFile) {
modelPath = modelFile
} else {
modelPath = filepath.Join(opts.ModelPath, modelFile)
}
if _, err := os.Stat(modelPath); err != nil {
return fmt.Errorf("rfdetr-cpp: model file not found: %s: %w", modelPath, err)
}
threads := opts.Threads
if threads <= 0 {
threads = 4
}
// Release previous model if any (re-Load).
if r.handle != 0 {
CapiUnload(r.handle)
r.handle = 0
}
var h uintptr
rc := CapiLoad(modelPath, threads, &h)
if rc != 0 || h == 0 {
return fmt.Errorf("rfdetr-cpp: rfdetr_capi_load failed with rc=%d for %s", rc, modelPath)
}
r.handle = h
return nil
}
// Detect runs object detection on the base64-encoded image in opts.Src at
// opts.Threshold, returning one pb.Detection per result. Seg models also
// populate Detection.Mask with PNG-encoded mask bytes.
func (r *RFDetrCpp) Detect(opts *pb.DetectOptions) (pb.DetectResponse, error) {
if r.handle == 0 {
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: model not loaded")
}
// Decode base64 image and write to temp file.
imgData, err := base64.StdEncoding.DecodeString(opts.Src)
if err != nil {
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to decode base64 image: %w", err)
}
tmpFile, err := os.CreateTemp("", "rfdetr-*.img")
if err != nil {
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to create temp file: %w", err)
}
defer func() { _ = os.Remove(tmpFile.Name()) }()
if _, err := tmpFile.Write(imgData); err != nil {
_ = tmpFile.Close()
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to write temp file: %w", err)
}
if err := tmpFile.Close(); err != nil {
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: failed to close temp file: %w", err)
}
threshold := opts.Threshold
if threshold <= 0 {
threshold = 0.5
}
// JSON output from detect_path is unused: we read structured detections via
// the accessor functions. Still must free the returned string.
var jsonPtr uintptr
rc := CapiDetectPath(r.handle, tmpFile.Name(), threshold, uint32(defaultTopK), &jsonPtr)
if jsonPtr != 0 {
CapiFreeString(jsonPtr)
}
if rc != 0 {
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: detect failed with rc=%d", rc)
}
n := CapiGetNDetections(r.handle)
if n < 0 {
return pb.DetectResponse{}, fmt.Errorf("rfdetr-cpp: invalid n_detections=%d", n)
}
detections := make([]*pb.Detection, 0, n)
for i := int32(0); i < n; i++ {
var bbox [4]float32 // x1, y1, x2, y2
if rc := CapiGetDetectionBox(r.handle, i, uintptr(unsafe.Pointer(&bbox[0]))); rc != 0 {
continue
}
cid := CapiGetDetectionClassID(r.handle, i)
score := CapiGetDetectionScore(r.handle, i)
// Two-call sizing for class_name.
var className string
nameSize := CapiGetDetectionClassName(r.handle, i, 0, 0)
if nameSize > 1 {
buf := make([]byte, nameSize)
written := CapiGetDetectionClassName(r.handle, i, uintptr(unsafe.Pointer(&buf[0])), nameSize)
// `written` is the same number (needed bytes including NUL); strip NUL.
if written > 0 && int(written) <= len(buf) {
className = string(buf[:written-1])
} else {
className = string(buf[:len(buf)-1])
}
}
if className == "" {
className = strconv.Itoa(int(cid))
}
// Two-call sizing for mask PNG (returns 0 when no mask).
var mask []byte
maskSize := CapiGetDetectionMaskPNG(r.handle, i, 0, 0)
if maskSize > 0 {
maskBuf := make([]byte, maskSize)
CapiGetDetectionMaskPNG(r.handle, i, uintptr(unsafe.Pointer(&maskBuf[0])), maskSize)
mask = maskBuf
}
detections = append(detections, &pb.Detection{
X: bbox[0],
Y: bbox[1],
Width: bbox[2] - bbox[0],
Height: bbox[3] - bbox[1],
Confidence: score,
ClassName: className,
Mask: mask,
})
}
return pb.DetectResponse{
Detections: detections,
}, nil
}