feat(rfdetr): add object detection API (#5923)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-24 08:38:02 -04:00 · 2025-07-27 22:02:51 +02:00
parent 73ecb7f90b
commit 949e5b9be8
34 changed files with 884 additions and 7 deletions
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -20,6 +20,7 @@ service Backend {
  rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
  rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
  rpc Status(HealthMessage) returns (StatusResponse) {}
+  rpc Detect(DetectOptions) returns (DetectResponse) {}

  rpc StoresSet(StoresSetOptions) returns (Result) {}
  rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
@@ -376,3 +377,20 @@ message Message {
  string role = 1;
  string content = 2;
 }
+
+message DetectOptions {
+  string src = 1;
+}
+
+message Detection {
+  float x = 1;
+  float y = 2;
+  float width = 3;
+  float height = 4;
+  float confidence = 5;
+  string class_name = 6;
+}
+
+message DetectResponse {
+  repeated Detection Detections = 1;
+}
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -73,6 +73,28 @@
    nvidia-l4t: "nvidia-l4t-arm64-stablediffusion-ggml"
    # metal: "metal-stablediffusion-ggml"
    # darwin-x86: "darwin-x86-stablediffusion-ggml"
+- &rfdetr
+  name: "rfdetr"
+  alias: "rfdetr"
+  license: apache-2.0
+  icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4
+  description: |
+    RF-DETR is a real-time, transformer-based object detection model architecture developed by Roboflow and released under the Apache 2.0 license.
+    RF-DETR is the first real-time model to exceed 60 AP on the Microsoft COCO benchmark alongside competitive performance at base sizes. It also achieves state-of-the-art performance on RF100-VL, an object detection benchmark that measures model domain adaptability to real world problems. RF-DETR is fastest and most accurate for its size when compared current real-time objection models.
+    RF-DETR is small enough to run on the edge using Inference, making it an ideal model for deployments that need both strong accuracy and real-time performance.
+  urls:
+    - https://github.com/roboflow/rf-detr
+  tags:
+    - object-detection
+    - rfdetr
+    - gpu
+    - cpu
+  capabilities:
+    nvidia: "cuda12-rfdetr"
+    intel: "intel-rfdetr"
+    #amd: "rocm-rfdetr"
+    nvidia-l4t: "nvidia-l4t-arm64-rfdetr"
+    default: "cpu-rfdetr"
 - &vllm
  name: "vllm"
  license: apache-2.0
@@ -663,6 +685,65 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm"
  mirrors:
    - localai/localai-backends:master-gpu-intel-sycl-f16-vllm
+# rfdetr
+- !!merge <<: *rfdetr
+  name: "rfdetr-development"
+  capabilities:
+    nvidia: "cuda12-rfdetr-development"
+    intel: "intel-rfdetr-development"
+    #amd: "rocm-rfdetr-development"
+    nvidia-l4t: "nvidia-l4t-arm64-rfdetr-development"
+    default: "cpu-rfdetr-development"
+- !!merge <<: *rfdetr
+  name: "cuda12-rfdetr"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rfdetr"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-rfdetr
+- !!merge <<: *rfdetr
+  name: "intel-rfdetr"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-rfdetr
+# - !!merge <<: *rfdetr
+#   name: "rocm-rfdetr"
+#   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-hipblas-rfdetr"
+#   mirrors:
+#     - localai/localai-backends:latest-gpu-hipblas-rfdetr
+- !!merge <<: *rfdetr
+  name: "nvidia-l4t-arm64-rfdetr"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-rfdetr"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-arm64-rfdetr
+- !!merge <<: *rfdetr
+  name: "cpu-rfdetr"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-rfdetr"
+  mirrors:
+    - localai/localai-backends:latest-cpu-rfdetr
+- !!merge <<: *rfdetr
+  name: "cuda12-rfdetr-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rfdetr"
+  mirrors:
+    - localai/localai-backends:master-gpu-nvidia-cuda-12-rfdetr
+- !!merge <<: *rfdetr
+  name: "intel-rfdetr-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-rfdetr"
+  mirrors:
+    - localai/localai-backends:master-gpu-intel-rfdetr
+# - !!merge <<: *rfdetr
+#   name: "rocm-rfdetr-development"
+#   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-hipblas-rfdetr"
+#   mirrors:
+#     - localai/localai-backends:master-gpu-hipblas-rfdetr
+- !!merge <<: *rfdetr
+  name: "cpu-rfdetr-development"
+  uri: "quay.io/go-skynet/local-ai-backends:master-cpu-rfdetr"
+  mirrors:
+    - localai/localai-backends:master-cpu-rfdetr
+- !!merge <<: *rfdetr
+  name: "intel-rfdetr"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-rfdetr"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-rfdetr
 ## Rerankers
 - !!merge <<: *rerankers
  name: "rerankers-development"
--- a/backend/python/common/template/protogen.sh
+++ b/backend/python/common/template/protogen.sh
@@ -8,4 +8,6 @@ else
    source $backend_dir/../common/libbackend.sh
 fi

+ensureVenv
+
 python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/rfdetr/Makefile
+++ b/backend/python/rfdetr/Makefile
@@ -0,0 +1,20 @@
+.DEFAULT_GOAL := install
+
+.PHONY: install
+install:
+	bash install.sh
+	$(MAKE) protogen
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	bash protogen.sh
+
+.PHONY: clean
+clean: protogen-clean
+	rm -rf venv __pycache__
--- a/backend/python/rfdetr/backend.py
+++ b/backend/python/rfdetr/backend.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+gRPC server for RFDETR object detection models.
+"""
+from concurrent import futures
+
+import argparse
+import signal
+import sys
+import os
+import time
+import base64
+import backend_pb2
+import backend_pb2_grpc
+import grpc
+
+import requests
+
+import supervision as sv
+from inference import get_model
+from PIL import Image
+from io import BytesIO
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    A gRPC servicer for the RFDETR backend service.
+
+    This class implements the gRPC methods for object detection using RFDETR models.
+    """
+    
+    def __init__(self):
+        self.model = None
+        self.model_name = None
+        
+    def Health(self, request, context):
+        """
+        A gRPC method that returns the health status of the backend service.
+
+        Args:
+            request: A HealthMessage object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Reply object that contains the health status of the backend service.
+        """
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        """
+        A gRPC method that loads a RFDETR model into memory.
+
+        Args:
+            request: A ModelOptions object that contains the model parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Result object that contains the result of the LoadModel operation.
+        """
+        model_name = request.Model
+        try:
+            # Load the RFDETR model
+            self.model = get_model(model_name)
+            self.model_name = model_name
+            print(f'Loaded RFDETR model: {model_name}')
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Failed to load model: {err}")
+
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def Detect(self, request, context):
+        """
+        A gRPC method that performs object detection on an image.
+
+        Args:
+            request: A DetectOptions object that contains the image source.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A DetectResponse object that contains the detection results.
+        """
+        if self.model is None:
+            print(f"Model is None")
+            return backend_pb2.DetectResponse()
+        print(f"Model is not None")
+        try:
+            print(f"Decoding image")
+            # Decode the base64 image
+            print(f"Image data: {request.src}")
+
+            image_data = base64.b64decode(request.src)
+            image = Image.open(BytesIO(image_data))
+            
+            # Perform inference
+            predictions = self.model.infer(image, confidence=0.5)[0]
+          
+            # Convert to proto format
+            proto_detections = []
+            for i in range(len(predictions.predictions)):
+                pred = predictions.predictions[i]
+                print(f"Prediction: {pred}")
+                proto_detection = backend_pb2.Detection(
+                    x=float(pred.x),
+                    y=float(pred.y),
+                    width=float(pred.width),
+                    height=float(pred.height),
+                    confidence=float(pred.confidence),
+                    class_name=pred.class_name
+                )
+                proto_detections.append(proto_detection)
+            
+            return backend_pb2.DetectResponse(Detections=proto_detections)
+        except Exception as err:
+            print(f"Detection error: {err}")
+            return backend_pb2.DetectResponse()
+
+    def Status(self, request, context):
+        """
+        A gRPC method that returns the status of the backend service.
+
+        Args:
+            request: A HealthMessage object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A StatusResponse object that contains the status information.
+        """
+        state = backend_pb2.StatusResponse.READY if self.model is not None else backend_pb2.StatusResponse.UNINITIALIZED
+        return backend_pb2.StatusResponse(state=state)
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
+        options=[
+            ('grpc.max_message_length', 50 * 1024 * 1024),  # 50MB
+            ('grpc.max_send_message_length', 50 * 1024 * 1024),  # 50MB
+            ('grpc.max_receive_message_length', 50 * 1024 * 1024),  # 50MB
+        ])
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("[RFDETR] Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("[RFDETR] Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the RFDETR gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+    print(f"[RFDETR] startup: {args}", file=sys.stderr)
+    serve(args.addr)
+
+
+
--- a/backend/python/rfdetr/install.sh
+++ b/backend/python/rfdetr/install.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+installRequirements
--- a/backend/python/rfdetr/protogen.sh
+++ b/backend/python/rfdetr/protogen.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -e
+
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi
+
+ensureVenv
+
+python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/rfdetr/requirements-cpu.txt
+++ b/backend/python/rfdetr/requirements-cpu.txt
@@ -0,0 +1,7 @@
+rfdetr
+opencv-python
+accelerate
+peft
+inference
+torch==2.7.1
+optimum-quanto
--- a/backend/python/rfdetr/requirements-cublas11.txt
+++ b/backend/python/rfdetr/requirements-cublas11.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.7.1+cu118
+rfdetr
+opencv-python
+accelerate
+inference
+peft
+optimum-quanto
--- a/backend/python/rfdetr/requirements-cublas12.txt
+++ b/backend/python/rfdetr/requirements-cublas12.txt
@@ -0,0 +1,7 @@
+torch==2.7.1
+rfdetr
+opencv-python
+accelerate
+inference
+peft
+optimum-quanto
--- a/backend/python/rfdetr/requirements-hipblas.txt
+++ b/backend/python/rfdetr/requirements-hipblas.txt
@@ -0,0 +1,9 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.3
+torch==2.7.1+rocm6.3
+torchvision==0.22.1+rocm6.3
+rfdetr
+opencv-python
+accelerate
+inference
+peft
+optimum-quanto
--- a/backend/python/rfdetr/requirements-intel.txt
+++ b/backend/python/rfdetr/requirements-intel.txt
@@ -0,0 +1,13 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchvision==0.18.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
+optimum[openvino]
+setuptools
+rfdetr
+inference
+opencv-python
+accelerate
+peft
+optimum-quanto
--- a/backend/python/rfdetr/requirements.txt
+++ b/backend/python/rfdetr/requirements.txt
@@ -0,0 +1,3 @@
+grpcio==1.71.0
+protobuf
+grpcio-tools
--- a/backend/python/rfdetr/run.sh
+++ b/backend/python/rfdetr/run.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi
+
+startBackend $@
--- a/backend/python/rfdetr/test.sh
+++ b/backend/python/rfdetr/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -e
+
+backend_dir=$(dirname $0)
+if [ -d $backend_dir/common ]; then
+    source $backend_dir/common/libbackend.sh
+else
+    source $backend_dir/../common/libbackend.sh
+fi
+
+runUnittests