Files
zoneminder/src/yolov4_postprocess.cpp
2025-02-27 15:19:26 -08:00

448 lines
14 KiB
C++

#include "config.h"
#if HAVE_QUADRA
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "nierrno.h"
#include "yolo_postprocess.h"
#include "yolo_model.h"
#include "zm_logger.h"
#define BIASES_NUM 12
/* class */
// int g_masks[2][3] = { { 3, 4, 5 }, { 1, 2, 3 } };
// float g_biases[] = { 10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319 };
/* human face */
static int g_masks[2][3] = {{3, 4, 5}, {0, 1, 2}};
static float g_biases[] = {10, 16, 25, 37, 49, 71, 85, 118, 143, 190, 274, 283};
static int entry_index(ni_roi_network_layer_t *l, int batch, int location,
int entry) {
int n = location / (l->width * l->height);
int loc = location % (l->width * l->height);
return n * l->width * l->height * (4 + l->classes + 1) +
entry * l->width * l->height + loc;
}
static float sigmoid(float x) {
return (float)(1.0 / (1.0 + (float)exp((double)(-x))));
}
/*
* nw: network input width
* nh: network input height
* lw: layer width
* lh: layer height
*/
static box get_yolo_box(float *x, float *biases, int n, int index, int col,
int row, int lw, int lh, int nw, int nh, int stride) {
box b;
b.x = (float)((float)col + sigmoid(x[index + 0 * stride])) / (float)lw;
b.y = (float)((float)row + sigmoid(x[index + 1 * stride])) / (float)lh;
b.w = (float)exp((double)x[index + 2 * stride]) * biases[2 * n] / (float)nw;
b.h = (float)exp((double)x[index + 3 * stride]) * biases[2 * n + 1] /
(float)nh;
b.x -= (float)(b.w / 2.0);
b.y -= (float)(b.h / 2.0);
return b;
}
static int get_yolo_detections(ni_roi_network_layer_t *l, int netw,
int neth, float thresh,
detection_cache *det_cache, int *dets_num) {
int i, n, k;
float *predictions = l->output;
float max_prob;
int prob_class;
// This snippet below is not necessary
// Need to comment it in order to batch processing >= 2 images
// if (l.batch == 2) avg_flipped_yolo(l);
int count = 0;
detection *dets = det_cache->dets;
*dets_num = 0;
Debug(1, "pic %dx%d, comp=%d, class=%d, net %dx%d, thresh=%f\n", l->width,
l->height, l->component, l->classes, netw, neth, thresh);
for (i = 0; i < l->width * l->height; ++i) {
int row = i / l->width;
int col = i % l->width;
for (n = 0; n < l->component; ++n) {
int obj_index = entry_index(l, 0, n * l->width * l->height + i, 4);
float objectness = predictions[obj_index];
objectness = sigmoid(objectness);
prob_class = -1;
max_prob = thresh;
for (k = 0; k < l->classes; k++) {
int class_index =
entry_index(l, 0, n * l->width * l->height + i, 4 + 1 + k);
double prob = objectness * sigmoid(predictions[class_index]);
if (prob >= max_prob) {
prob_class = k;
max_prob = (float)prob;
}
}
if (prob_class >= 0) {
box bbox;
int box_index =
entry_index(l, 0, n * l->width * l->height + i, 0);
if (det_cache->dets_num >= det_cache->capacity) {
dets =static_cast<detection *>(
realloc(det_cache->dets,
sizeof(detection) * (det_cache->capacity + 10)));
if (!dets) {
Error("failed to realloc detections capacity %d",
det_cache->capacity);
return NIERROR(ENOMEM);
}
det_cache->dets = dets;
det_cache->capacity += 10;
if (det_cache->capacity >= 100) {
Warning("too many detections %d\n", det_cache->dets_num);
}
}
Debug(1, "max_prob %f, class %d\n", max_prob, prob_class);
bbox = get_yolo_box(predictions, l->biases, l->mask[n],
box_index, col, row, l->width, l->height,
netw, neth, l->width * l->height);
dets[det_cache->dets_num].max_prob = max_prob;
dets[det_cache->dets_num].prob_class = prob_class;
dets[det_cache->dets_num].bbox = bbox;
dets[det_cache->dets_num].objectness = objectness;
dets[det_cache->dets_num].classes = l->classes;
dets[det_cache->dets_num].color = n;
Debug(1, "%d, x %f, y %f, w %f, h %f\n",
det_cache->dets_num, dets[det_cache->dets_num].bbox.x,
dets[det_cache->dets_num].bbox.y,
dets[det_cache->dets_num].bbox.w,
dets[det_cache->dets_num].bbox.h);
det_cache->dets_num++;
count++;
}
}
}
*dets_num = count;
return 0;
}
static int nms_comparator(const void *pa, const void *pb) {
detection *a = (detection *)pa;
detection *b = (detection *)pb;
if (a->prob_class > b->prob_class)
return 1;
else if (a->prob_class < b->prob_class)
return -1;
else {
if (a->max_prob < b->max_prob)
return 1;
else if (a->max_prob > b->max_prob)
return -1;
}
return 0;
}
static float overlap(float x1, float w1, float x2, float w2) {
float l1 = x1 - w1 / 2;
float l2 = x2 - w2 / 2;
float left = l1 > l2 ? l1 : l2;
float r1 = x1 + w1 / 2;
float r2 = x2 + w2 / 2;
float right = r1 < r2 ? r1 : r2;
return right - left;
}
static float box_intersection(box a, box b) {
float w = overlap(a.x, a.w, b.x, b.w);
float h = overlap(a.y, a.h, b.y, b.h);
float area;
if (w < 0 || h < 0)
return 0;
area = w * h;
return area;
}
static float box_union(box a, box b) {
float i = box_intersection(a, b);
float u = a.w * a.h + b.w * b.h - i;
return u;
}
static float box_iou(box a, box b) {
// return box_intersection(a, b)/box_union(a, b);
float I = box_intersection(a, b);
float U = box_union(a, b);
if (I == 0 || U == 0)
return 0;
return I / U;
}
static int nms_sort(detection *dets, int dets_num, float nms_thresh) {
box boxa, boxb;
for (int i = 0; i < (dets_num - 1); i++) {
int prob_class = dets[i].prob_class;
if (dets[i].max_prob == 0)
continue;
if (dets[i].prob_class != dets[i + 1].prob_class)
continue;
boxa = dets[i].bbox;
for (int j = i + 1; j < dets_num && dets[j].prob_class == prob_class; j++) {
if (dets[j].max_prob == 0)
continue;
boxb = dets[j].bbox;
if (box_iou(boxa, boxb) > nms_thresh)
dets[j].max_prob = 0;
}
}
return 0;
}
static int resize_coords(detection *dets, int dets_num,
uint32_t img_width, uint32_t img_height,
uint32_t netw, uint32_t neth,
struct roi_box **roi_box, int *roi_num) {
int i;
unsigned int left, right, top, bot;
struct roi_box *rbox;
int rbox_num = 0;
if (dets_num == 0) {
return 0;
}
rbox = static_cast<struct roi_box *>(malloc(sizeof(struct roi_box) * dets_num));
if (!rbox)
return NIERROR(ENOMEM);
for (i = 0; i < dets_num; i++) {
Debug(1, "index %d, max_prob %f, class %d\n", i,
dets[i].max_prob, dets[i].prob_class);
if (dets[i].max_prob == 0)
continue;
top = (int)floor(dets[i].bbox.y * img_height + 0.5);
left = (int)floor(dets[i].bbox.x * img_width + 0.5);
right = (int)floor((dets[i].bbox.x + dets[i].bbox.w) * img_width + 0.5);
bot = (int)floor((dets[i].bbox.y + dets[i].bbox.h) * img_height + 0.5);
if (right > img_width)
right = img_width;
if (bot > img_height)
bot = img_height;
Debug(1, "top %d, left %d, right %d, bottom %d\n", top,
left, right, bot);
rbox[rbox_num].left = left;
rbox[rbox_num].right = right;
rbox[rbox_num].top = top;
rbox[rbox_num].bottom = bot;
rbox[rbox_num].ai_class = dets[i].prob_class;
rbox[rbox_num].objectness = dets[i].objectness;
rbox[rbox_num].prob = dets[i].max_prob;
rbox_num++;
}
if (rbox_num == 0) {
free(rbox);
*roi_num = rbox_num;
*roi_box = NULL;
} else {
*roi_num = rbox_num;
*roi_box = rbox;
}
return 0;
}
static int ni_yolov4_get_boxes(YoloModelCtx *ctx, uint32_t img_width,
uint32_t img_height, struct roi_box **roi_box, int *roi_num)
{
int i;
int ret;
int dets_num = 0;
detection *dets = NULL;
detection_cache *det_cache = &ctx->det_cache;
*roi_box = NULL;
*roi_num = 0;
ctx->det_cache.dets_num = 0;
for (i = 0; i < ctx->output_number; i++) {
ret = get_yolo_detections(&ctx->layers[i], ctx->input_width,
ctx->input_height, ctx->obj_thresh, det_cache, &dets_num);
if (ret != 0) {
Error("failed to get yolo detection at layer %d", i);
return ret;
}
Debug(1, "layer %d, yolo detections: %d", i, dets_num);
}
if (det_cache->dets_num == 0) {
return 0;
}
dets = det_cache->dets;
dets_num = det_cache->dets_num;
for (i = 0; i < dets_num; i++) {
Debug(1, "orig dets %d: x %f,y %f,w %f,h %f,c %d,p %f", i,
dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h,
dets[i].prob_class, dets[i].max_prob);
}
qsort(dets, dets_num, sizeof(detection), nms_comparator);
for (i = 0; i < dets_num; i++) {
Debug(1, "sorted dets %d: x %f,y %f,w %f,h %f,c %d,p %f", i,
dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h,
dets[i].prob_class, dets[i].max_prob);
}
nms_sort(dets, dets_num, ctx->nms_thresh);
ret = resize_coords(dets, dets_num, img_width, img_height,
ctx->input_width, ctx->input_height, roi_box, roi_num);
if (ret != 0) {
Error("cannot resize coordinates");
return ret;
}
return 0;
}
static int create_yolov4_model(
YoloModelCtx *ctx,
ni_network_data_t *network_data,
float obj_thresh,
float nms_thresh,
unsigned int ctx_width,
unsigned int ctx_height)
{
unsigned int i;
int ret = 0;
ctx->obj_thresh = obj_thresh;
ctx->nms_thresh = nms_thresh;
Debug(1, "Creating yolov4 model %ux%u %p", ctx_width, ctx_height, network_data);
if (ctx_width != network_data->linfo.in_param[0].sizes[0] ||
ctx_height != network_data->linfo.in_param[0].sizes[1]) {
Error("input dimensions not match: expect %dx%d, actual %dx%d",
ctx_width, ctx_height,
network_data->linfo.in_param[0].sizes[0],
network_data->linfo.in_param[0].sizes[1]);
return NIERROR(EINVAL);
}
ctx->input_width = network_data->linfo.in_param[0].sizes[0];
ctx->input_height = network_data->linfo.in_param[0].sizes[1];
ctx->output_number = network_data->output_num;
ctx->out_tensor = (uint8_t **)calloc(network_data->output_num,
sizeof(uint8_t **));
if (ctx->out_tensor == NULL) {
Error("failed to allocate output tensor bufptr");
return NIERROR(ENOMEM);
}
for (i = 0; i < network_data->output_num; i++) {
ni_network_layer_params_t *p_param = &network_data->linfo.out_param[i];
ctx->out_tensor[i] =
(uint8_t *)malloc(ni_ai_network_layer_dims(p_param) * sizeof(float));
if (ctx->out_tensor[i] == NULL) {
Error( "failed to allocate output tensor buffer");
return NIERROR(ENOMEM);
}
}
ctx->layers =static_cast<ni_roi_network_layer_t *>(
malloc(sizeof(ni_roi_network_layer_t) * network_data->output_num));
if (!ctx->layers) {
fprintf(stderr, "cannot allocate network layer memory\n");
return NIERROR(ENOMEM);
}
for (i = 0; i < network_data->output_num; i++) {
ctx->layers[i].width = network_data->linfo.out_param[i].sizes[0];
ctx->layers[i].height = network_data->linfo.out_param[i].sizes[1];
ctx->layers[i].channel = network_data->linfo.out_param[i].sizes[2];
ctx->layers[i].component = 3;
ctx->layers[i].classes =
(ctx->layers[i].channel / ctx->layers[i].component) -
(4 + 1);
ctx->layers[i].output_number =
ni_ai_network_layer_dims(&network_data->linfo.out_param[i]);
ctx->layers[i].output = (float *)ctx->out_tensor[i];
memcpy(ctx->layers[i].mask, &g_masks[i][0], sizeof(ctx->layers[i].mask));
ctx->layers[i].biases = (float *)malloc(BIASES_NUM * sizeof(float));
if (! ctx->layers[i].biases) {
Error("cannot allocate network layer memory");
return NIERROR(ENOMEM);
}
memcpy(ctx->layers[i].biases, &g_biases[0], BIASES_NUM * sizeof(float));
Debug(1, "network layer %d: w %d, h %d, ch %d, co %d, cl %d\n", i,
ctx->layers[i].width, ctx->layers[i].height,
ctx->layers[i].channel, ctx->layers[i].component,
ctx->layers[i].classes);
}
ctx->det_cache.dets_num = 0;
ctx->det_cache.capacity = 20;
ctx->det_cache.dets = static_cast<detection *>(malloc(sizeof(detection) * ctx->det_cache.capacity));
if (!ctx->det_cache.dets) {
Error("failed to allocate detection cache");
return NIERROR(ENOMEM);
}
return ret;
}
static void destroy_yolov4_model(YoloModelCtx *ctx)
{
if (ctx->out_tensor) {
int i;
for (i = 0; i < ctx->output_number; i++) {
free(ctx->out_tensor[i]);
free(ctx->layers[i].biases);
ctx->layers[i].biases = NULL;
}
free(ctx->out_tensor);
ctx->out_tensor = NULL;
}
free(ctx->det_cache.dets);
free(ctx->layers);
ctx->layers = NULL;
}
YoloModel yolov4 = {
.create_model = create_yolov4_model,
.destroy_model = destroy_yolov4_model,
.ni_get_boxes = ni_yolov4_get_boxes,
};
#endif