mirror of
https://github.com/ZoneMinder/zoneminder.git
synced 2026-05-30 17:35:37 -04:00
448 lines
14 KiB
C++
448 lines
14 KiB
C++
#include "config.h"
|
|
|
|
#if HAVE_QUADRA
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include "nierrno.h"
|
|
#include "yolo_postprocess.h"
|
|
#include "yolo_model.h"
|
|
#include "zm_logger.h"
|
|
|
|
#define BIASES_NUM 12
|
|
|
|
/* class */
|
|
// int g_masks[2][3] = { { 3, 4, 5 }, { 1, 2, 3 } };
|
|
// float g_biases[] = { 10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319 };
|
|
|
|
/* human face */
|
|
static int g_masks[2][3] = {{3, 4, 5}, {0, 1, 2}};
|
|
static float g_biases[] = {10, 16, 25, 37, 49, 71, 85, 118, 143, 190, 274, 283};
|
|
|
|
static int entry_index(ni_roi_network_layer_t *l, int batch, int location,
|
|
int entry) {
|
|
int n = location / (l->width * l->height);
|
|
int loc = location % (l->width * l->height);
|
|
return n * l->width * l->height * (4 + l->classes + 1) +
|
|
entry * l->width * l->height + loc;
|
|
}
|
|
|
|
static float sigmoid(float x) {
|
|
return (float)(1.0 / (1.0 + (float)exp((double)(-x))));
|
|
}
|
|
|
|
/*
|
|
* nw: network input width
|
|
* nh: network input height
|
|
* lw: layer width
|
|
* lh: layer height
|
|
*/
|
|
static box get_yolo_box(float *x, float *biases, int n, int index, int col,
|
|
int row, int lw, int lh, int nw, int nh, int stride) {
|
|
box b;
|
|
|
|
b.x = (float)((float)col + sigmoid(x[index + 0 * stride])) / (float)lw;
|
|
b.y = (float)((float)row + sigmoid(x[index + 1 * stride])) / (float)lh;
|
|
b.w = (float)exp((double)x[index + 2 * stride]) * biases[2 * n] / (float)nw;
|
|
b.h = (float)exp((double)x[index + 3 * stride]) * biases[2 * n + 1] /
|
|
(float)nh;
|
|
|
|
b.x -= (float)(b.w / 2.0);
|
|
b.y -= (float)(b.h / 2.0);
|
|
|
|
return b;
|
|
}
|
|
|
|
static int get_yolo_detections(ni_roi_network_layer_t *l, int netw,
|
|
int neth, float thresh,
|
|
detection_cache *det_cache, int *dets_num) {
|
|
int i, n, k;
|
|
float *predictions = l->output;
|
|
float max_prob;
|
|
int prob_class;
|
|
// This snippet below is not necessary
|
|
// Need to comment it in order to batch processing >= 2 images
|
|
// if (l.batch == 2) avg_flipped_yolo(l);
|
|
int count = 0;
|
|
detection *dets = det_cache->dets;
|
|
|
|
*dets_num = 0;
|
|
|
|
Debug(1, "pic %dx%d, comp=%d, class=%d, net %dx%d, thresh=%f\n", l->width,
|
|
l->height, l->component, l->classes, netw, neth, thresh);
|
|
for (i = 0; i < l->width * l->height; ++i) {
|
|
int row = i / l->width;
|
|
int col = i % l->width;
|
|
for (n = 0; n < l->component; ++n) {
|
|
int obj_index = entry_index(l, 0, n * l->width * l->height + i, 4);
|
|
float objectness = predictions[obj_index];
|
|
objectness = sigmoid(objectness);
|
|
|
|
prob_class = -1;
|
|
max_prob = thresh;
|
|
for (k = 0; k < l->classes; k++) {
|
|
int class_index =
|
|
entry_index(l, 0, n * l->width * l->height + i, 4 + 1 + k);
|
|
double prob = objectness * sigmoid(predictions[class_index]);
|
|
if (prob >= max_prob) {
|
|
prob_class = k;
|
|
max_prob = (float)prob;
|
|
}
|
|
}
|
|
|
|
if (prob_class >= 0) {
|
|
box bbox;
|
|
int box_index =
|
|
entry_index(l, 0, n * l->width * l->height + i, 0);
|
|
|
|
if (det_cache->dets_num >= det_cache->capacity) {
|
|
dets =static_cast<detection *>(
|
|
realloc(det_cache->dets,
|
|
sizeof(detection) * (det_cache->capacity + 10)));
|
|
if (!dets) {
|
|
Error("failed to realloc detections capacity %d",
|
|
det_cache->capacity);
|
|
return NIERROR(ENOMEM);
|
|
}
|
|
det_cache->dets = dets;
|
|
det_cache->capacity += 10;
|
|
if (det_cache->capacity >= 100) {
|
|
Warning("too many detections %d\n", det_cache->dets_num);
|
|
}
|
|
}
|
|
|
|
Debug(1, "max_prob %f, class %d\n", max_prob, prob_class);
|
|
bbox = get_yolo_box(predictions, l->biases, l->mask[n],
|
|
box_index, col, row, l->width, l->height,
|
|
netw, neth, l->width * l->height);
|
|
|
|
dets[det_cache->dets_num].max_prob = max_prob;
|
|
dets[det_cache->dets_num].prob_class = prob_class;
|
|
dets[det_cache->dets_num].bbox = bbox;
|
|
dets[det_cache->dets_num].objectness = objectness;
|
|
dets[det_cache->dets_num].classes = l->classes;
|
|
dets[det_cache->dets_num].color = n;
|
|
|
|
Debug(1, "%d, x %f, y %f, w %f, h %f\n",
|
|
det_cache->dets_num, dets[det_cache->dets_num].bbox.x,
|
|
dets[det_cache->dets_num].bbox.y,
|
|
dets[det_cache->dets_num].bbox.w,
|
|
dets[det_cache->dets_num].bbox.h);
|
|
det_cache->dets_num++;
|
|
count++;
|
|
}
|
|
}
|
|
}
|
|
*dets_num = count;
|
|
return 0;
|
|
}
|
|
|
|
static int nms_comparator(const void *pa, const void *pb) {
|
|
detection *a = (detection *)pa;
|
|
detection *b = (detection *)pb;
|
|
|
|
if (a->prob_class > b->prob_class)
|
|
return 1;
|
|
else if (a->prob_class < b->prob_class)
|
|
return -1;
|
|
else {
|
|
if (a->max_prob < b->max_prob)
|
|
return 1;
|
|
else if (a->max_prob > b->max_prob)
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static float overlap(float x1, float w1, float x2, float w2) {
|
|
float l1 = x1 - w1 / 2;
|
|
float l2 = x2 - w2 / 2;
|
|
float left = l1 > l2 ? l1 : l2;
|
|
float r1 = x1 + w1 / 2;
|
|
float r2 = x2 + w2 / 2;
|
|
float right = r1 < r2 ? r1 : r2;
|
|
return right - left;
|
|
}
|
|
|
|
static float box_intersection(box a, box b) {
|
|
float w = overlap(a.x, a.w, b.x, b.w);
|
|
float h = overlap(a.y, a.h, b.y, b.h);
|
|
float area;
|
|
|
|
if (w < 0 || h < 0)
|
|
return 0;
|
|
|
|
area = w * h;
|
|
return area;
|
|
}
|
|
|
|
static float box_union(box a, box b) {
|
|
float i = box_intersection(a, b);
|
|
float u = a.w * a.h + b.w * b.h - i;
|
|
return u;
|
|
}
|
|
|
|
static float box_iou(box a, box b) {
|
|
// return box_intersection(a, b)/box_union(a, b);
|
|
|
|
float I = box_intersection(a, b);
|
|
float U = box_union(a, b);
|
|
if (I == 0 || U == 0)
|
|
return 0;
|
|
|
|
return I / U;
|
|
}
|
|
|
|
static int nms_sort(detection *dets, int dets_num, float nms_thresh) {
|
|
box boxa, boxb;
|
|
|
|
for (int i = 0; i < (dets_num - 1); i++) {
|
|
int prob_class = dets[i].prob_class;
|
|
if (dets[i].max_prob == 0)
|
|
continue;
|
|
|
|
if (dets[i].prob_class != dets[i + 1].prob_class)
|
|
continue;
|
|
|
|
boxa = dets[i].bbox;
|
|
for (int j = i + 1; j < dets_num && dets[j].prob_class == prob_class; j++) {
|
|
if (dets[j].max_prob == 0)
|
|
continue;
|
|
|
|
boxb = dets[j].bbox;
|
|
if (box_iou(boxa, boxb) > nms_thresh)
|
|
dets[j].max_prob = 0;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int resize_coords(detection *dets, int dets_num,
|
|
uint32_t img_width, uint32_t img_height,
|
|
uint32_t netw, uint32_t neth,
|
|
struct roi_box **roi_box, int *roi_num) {
|
|
int i;
|
|
unsigned int left, right, top, bot;
|
|
struct roi_box *rbox;
|
|
int rbox_num = 0;
|
|
|
|
if (dets_num == 0) {
|
|
return 0;
|
|
}
|
|
|
|
rbox = static_cast<struct roi_box *>(malloc(sizeof(struct roi_box) * dets_num));
|
|
if (!rbox)
|
|
return NIERROR(ENOMEM);
|
|
|
|
for (i = 0; i < dets_num; i++) {
|
|
Debug(1, "index %d, max_prob %f, class %d\n", i,
|
|
dets[i].max_prob, dets[i].prob_class);
|
|
if (dets[i].max_prob == 0)
|
|
continue;
|
|
|
|
top = (int)floor(dets[i].bbox.y * img_height + 0.5);
|
|
left = (int)floor(dets[i].bbox.x * img_width + 0.5);
|
|
right = (int)floor((dets[i].bbox.x + dets[i].bbox.w) * img_width + 0.5);
|
|
bot = (int)floor((dets[i].bbox.y + dets[i].bbox.h) * img_height + 0.5);
|
|
|
|
if (right > img_width)
|
|
right = img_width;
|
|
|
|
if (bot > img_height)
|
|
bot = img_height;
|
|
|
|
Debug(1, "top %d, left %d, right %d, bottom %d\n", top,
|
|
left, right, bot);
|
|
|
|
rbox[rbox_num].left = left;
|
|
rbox[rbox_num].right = right;
|
|
rbox[rbox_num].top = top;
|
|
rbox[rbox_num].bottom = bot;
|
|
rbox[rbox_num].ai_class = dets[i].prob_class;
|
|
rbox[rbox_num].objectness = dets[i].objectness;
|
|
rbox[rbox_num].prob = dets[i].max_prob;
|
|
rbox_num++;
|
|
}
|
|
|
|
if (rbox_num == 0) {
|
|
free(rbox);
|
|
*roi_num = rbox_num;
|
|
*roi_box = NULL;
|
|
} else {
|
|
*roi_num = rbox_num;
|
|
*roi_box = rbox;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ni_yolov4_get_boxes(YoloModelCtx *ctx, uint32_t img_width,
|
|
uint32_t img_height, struct roi_box **roi_box, int *roi_num)
|
|
{
|
|
int i;
|
|
int ret;
|
|
int dets_num = 0;
|
|
detection *dets = NULL;
|
|
detection_cache *det_cache = &ctx->det_cache;
|
|
|
|
*roi_box = NULL;
|
|
*roi_num = 0;
|
|
|
|
ctx->det_cache.dets_num = 0;
|
|
|
|
for (i = 0; i < ctx->output_number; i++) {
|
|
ret = get_yolo_detections(&ctx->layers[i], ctx->input_width,
|
|
ctx->input_height, ctx->obj_thresh, det_cache, &dets_num);
|
|
if (ret != 0) {
|
|
Error("failed to get yolo detection at layer %d", i);
|
|
return ret;
|
|
}
|
|
Debug(1, "layer %d, yolo detections: %d", i, dets_num);
|
|
}
|
|
|
|
if (det_cache->dets_num == 0) {
|
|
return 0;
|
|
}
|
|
|
|
dets = det_cache->dets;
|
|
dets_num = det_cache->dets_num;
|
|
for (i = 0; i < dets_num; i++) {
|
|
Debug(1, "orig dets %d: x %f,y %f,w %f,h %f,c %d,p %f", i,
|
|
dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h,
|
|
dets[i].prob_class, dets[i].max_prob);
|
|
}
|
|
|
|
qsort(dets, dets_num, sizeof(detection), nms_comparator);
|
|
for (i = 0; i < dets_num; i++) {
|
|
Debug(1, "sorted dets %d: x %f,y %f,w %f,h %f,c %d,p %f", i,
|
|
dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h,
|
|
dets[i].prob_class, dets[i].max_prob);
|
|
}
|
|
|
|
nms_sort(dets, dets_num, ctx->nms_thresh);
|
|
ret = resize_coords(dets, dets_num, img_width, img_height,
|
|
ctx->input_width, ctx->input_height, roi_box, roi_num);
|
|
if (ret != 0) {
|
|
Error("cannot resize coordinates");
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int create_yolov4_model(
|
|
YoloModelCtx *ctx,
|
|
ni_network_data_t *network_data,
|
|
float obj_thresh,
|
|
float nms_thresh,
|
|
unsigned int ctx_width,
|
|
unsigned int ctx_height)
|
|
{
|
|
unsigned int i;
|
|
int ret = 0;
|
|
|
|
ctx->obj_thresh = obj_thresh;
|
|
ctx->nms_thresh = nms_thresh;
|
|
|
|
Debug(1, "Creating yolov4 model %ux%u %p", ctx_width, ctx_height, network_data);
|
|
if (ctx_width != network_data->linfo.in_param[0].sizes[0] ||
|
|
ctx_height != network_data->linfo.in_param[0].sizes[1]) {
|
|
Error("input dimensions not match: expect %dx%d, actual %dx%d",
|
|
ctx_width, ctx_height,
|
|
network_data->linfo.in_param[0].sizes[0],
|
|
network_data->linfo.in_param[0].sizes[1]);
|
|
return NIERROR(EINVAL);
|
|
}
|
|
|
|
ctx->input_width = network_data->linfo.in_param[0].sizes[0];
|
|
ctx->input_height = network_data->linfo.in_param[0].sizes[1];
|
|
|
|
ctx->output_number = network_data->output_num;
|
|
ctx->out_tensor = (uint8_t **)calloc(network_data->output_num,
|
|
sizeof(uint8_t **));
|
|
if (ctx->out_tensor == NULL) {
|
|
Error("failed to allocate output tensor bufptr");
|
|
return NIERROR(ENOMEM);
|
|
}
|
|
|
|
for (i = 0; i < network_data->output_num; i++) {
|
|
ni_network_layer_params_t *p_param = &network_data->linfo.out_param[i];
|
|
ctx->out_tensor[i] =
|
|
(uint8_t *)malloc(ni_ai_network_layer_dims(p_param) * sizeof(float));
|
|
if (ctx->out_tensor[i] == NULL) {
|
|
Error( "failed to allocate output tensor buffer");
|
|
return NIERROR(ENOMEM);
|
|
}
|
|
}
|
|
|
|
ctx->layers =static_cast<ni_roi_network_layer_t *>(
|
|
malloc(sizeof(ni_roi_network_layer_t) * network_data->output_num));
|
|
if (!ctx->layers) {
|
|
fprintf(stderr, "cannot allocate network layer memory\n");
|
|
return NIERROR(ENOMEM);
|
|
}
|
|
|
|
for (i = 0; i < network_data->output_num; i++) {
|
|
ctx->layers[i].width = network_data->linfo.out_param[i].sizes[0];
|
|
ctx->layers[i].height = network_data->linfo.out_param[i].sizes[1];
|
|
ctx->layers[i].channel = network_data->linfo.out_param[i].sizes[2];
|
|
ctx->layers[i].component = 3;
|
|
ctx->layers[i].classes =
|
|
(ctx->layers[i].channel / ctx->layers[i].component) -
|
|
(4 + 1);
|
|
ctx->layers[i].output_number =
|
|
ni_ai_network_layer_dims(&network_data->linfo.out_param[i]);
|
|
|
|
ctx->layers[i].output = (float *)ctx->out_tensor[i];
|
|
|
|
memcpy(ctx->layers[i].mask, &g_masks[i][0], sizeof(ctx->layers[i].mask));
|
|
|
|
ctx->layers[i].biases = (float *)malloc(BIASES_NUM * sizeof(float));
|
|
if (! ctx->layers[i].biases) {
|
|
Error("cannot allocate network layer memory");
|
|
return NIERROR(ENOMEM);
|
|
}
|
|
memcpy(ctx->layers[i].biases, &g_biases[0], BIASES_NUM * sizeof(float));
|
|
|
|
Debug(1, "network layer %d: w %d, h %d, ch %d, co %d, cl %d\n", i,
|
|
ctx->layers[i].width, ctx->layers[i].height,
|
|
ctx->layers[i].channel, ctx->layers[i].component,
|
|
ctx->layers[i].classes);
|
|
}
|
|
|
|
ctx->det_cache.dets_num = 0;
|
|
ctx->det_cache.capacity = 20;
|
|
ctx->det_cache.dets = static_cast<detection *>(malloc(sizeof(detection) * ctx->det_cache.capacity));
|
|
if (!ctx->det_cache.dets) {
|
|
Error("failed to allocate detection cache");
|
|
return NIERROR(ENOMEM);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static void destroy_yolov4_model(YoloModelCtx *ctx)
|
|
{
|
|
if (ctx->out_tensor) {
|
|
int i;
|
|
for (i = 0; i < ctx->output_number; i++) {
|
|
free(ctx->out_tensor[i]);
|
|
free(ctx->layers[i].biases);
|
|
ctx->layers[i].biases = NULL;
|
|
}
|
|
free(ctx->out_tensor);
|
|
ctx->out_tensor = NULL;
|
|
}
|
|
free(ctx->det_cache.dets);
|
|
free(ctx->layers);
|
|
ctx->layers = NULL;
|
|
}
|
|
|
|
YoloModel yolov4 = {
|
|
.create_model = create_yolov4_model,
|
|
.destroy_model = destroy_yolov4_model,
|
|
.ni_get_boxes = ni_yolov4_get_boxes,
|
|
};
|
|
|
|
#endif
|