diff --git a/src/zm_zone.cpp b/src/zm_zone.cpp index 95dbb4f72..9d8ce354a 100644 --- a/src/zm_zone.cpp +++ b/src/zm_zone.cpp @@ -1033,217 +1033,3 @@ void Zone::std_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsig *pixel_count = pixelsalarmed; *pixel_sum = pixelsdifference; } - -__attribute__((noinline,__target__("sse2"))) void Zone::sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum) { -#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) - __attribute__((aligned(16))) static uint8_t calc_maxpthreshold[16] = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127}; - __attribute__((aligned(16))) static uint8_t calc_minpthreshold[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - static uint8_t current_minpthreshold = 0; - static uint8_t current_maxpthreshold = 255; - unsigned int minpthreshold = min_pixel_threshold; - unsigned int maxpthreshold = max_pixel_threshold; - uint32_t pixelsalarmed = 0; - uint32_t pixelsdifference = 0; - unsigned int lo_y = polygon.LoY(); - unsigned int hi_y = polygon.HiY()+1; - unsigned int lo_x = polygon.LoX(); - unsigned int hi_x = polygon.HiX()+1; - - if(maxpthreshold == 0) - maxpthreshold = 255; - - if(minpthreshold != current_minpthreshold) { - for(unsigned int i=0;i>1; - current_minpthreshold = minpthreshold; - } - - if(maxpthreshold != current_maxpthreshold) { - for(unsigned int i=0;i>1; - current_maxpthreshold = maxpthreshold; - } - - /* - * We have to work on 16 byte aligned addresses. - * Assume width is multiples of 16 and align the lo_x and hi_x to be on a 16 byte boundary - */ - if((lo_x % 16) != 0) { - lo_x = lo_x - (lo_x % 16); - } - if((hi_x % 16) != 0) { - hi_x = hi_x + (16 - (hi_x % 16)); - if( hi_x > pdiff_image->Width() ) - /* Clamp hi_x to width */ - hi_x = pdiff_image->Width(); - } - if( hi_y > pdiff_image->Height() ) { - /* Clamp hi y to height */ - hi_y = pdiff_image->Height(); - } - - unsigned int x = lo_x; - unsigned int y = lo_y; - unsigned long xgap = hi_x - lo_x; - unsigned long width = pdiff_image->Width(); - uint8_t* pdiff = (uint8_t*)pdiff_image->Buffer(lo_x, lo_y); - const uint8_t* ppoly = ppoly_image->Buffer(lo_x, lo_y); - - /* Some sanity checks */ - if((width % 16) != 0) { - Fatal("Image width is not multiples of 16!"); - } - if((xgap % 16) != 0) { - /* Shouldn't happen but just in case */ - Fatal("Difference between calculated hi_x and lo_x is not multiples of 16"); - } - if(lo_x == hi_x) { - Error("lo_x and hi_x are identical, nothing to scan. Scanning the whole line instead"); - lo_x = 0; - hi_x = width; - } - - - /* XMM0,1,2,3 - General purpose */ - /* XMM4 - alarmed pixels count */ - /* XMM5 - difference accumulator */ - /* XMM6 - min pixel threshold mask */ - /* XMM7 - max pixel threshold mask */ - /* XMM8 - divide mask */ - /* XMM9 - 0x01 mask */ - /* - Register map: - %0 - pixelsalarmed - %1 - pixelsdifference - %2 - pdiff - %3 - ppoly - %4 - X - %5 - Y - %6 - min pixel mask - %7 - max pixel mask - %8 - lo_y - %9 - lo_x - %10 - hi_x - %11 - hi_y - %12 - width - %13 - xgap - */ - - /* Initial setup - * set X to lo_x - * set Y to lo_y - * set pdiff to pdiff start + (width * lo_y) + lo_x - * set ppoly to ppoly start + (width * lo_y) + lo_x - * set xgap to hi_x - lo_x - */ - - __asm__ __volatile__ ( - "pxor %%xmm4, %%xmm4\n\t" // Zero out the alarmed pixels count - "pxor %%xmm5, %%xmm5\n\t" // Zero out the difference accumulator - "movdqa %6, %%xmm6\n\t" // Load the min pixel threshold (divided by 2) - "movdqa %7, %%xmm7\n\t" // Load the max pixel threshold (divided by 2) -#if defined(__x86_64__) - "mov $0x7F7F7F7F, %%eax\n\t" /* Divide mask */ - "movd %%eax, %%xmm8\n\t" - "pshufd $0x0, %%xmm8, %%xmm8\n\t" - "mov $0x01010101, %%eax\n\t" /* 0x1 mask */ - "movd %%eax, %%xmm9\n\t" - "pshufd $0x0, %%xmm9, %%xmm9\n\t" -#endif - /* Iteration start */ - "sse2_ap_iter:\n\t" - "movdqa (%2), %%xmm0\n\t" // Load the pdiff - "movdqa (%3), %%xmm1\n\t" // Load the ppoly - - "pand %%xmm0, %%xmm1\n\t" // Filter out pixels not inside polygon. Result stored on XMM1 - "movdqa %%xmm1, %%xmm2\n\t" // Move the result into XMM2 - "psrlq $0x1, %%xmm2\n\t" // Divide the result by 2 (part 1) -#if defined(__x86_64__) - "pand %%xmm8, %%xmm2\n\t" // Divide the result by 2 (part 2) -#else - "mov $0x7F7F7F7F, %%eax\n\t" - "movd %%eax, %%xmm0\n\t" - "pshufd $0x0, %%xmm0, %%xmm0\n\t" - "pand %%xmm0, %%xmm2\n\t" // Divide the result by 2 (part 2) -#endif - - /* Filter out pixels bigger than max threshold and update XMM0 */ - "movdqa %%xmm7, %%xmm0\n\t" // Copy max threshold to XMM0 - "pcmpgtb %%xmm2, %%xmm0\n\t" // Filter out pixels bigger than max threshold, result stored on XMM0 - "pand %%xmm2, %%xmm0\n\t" // XMM0 = Dividied pixels that are in poly and meet maximum threshold - - /* Filter out pixels smaller than min threshold */ - "pcmpgtb %%xmm6, %%xmm0\n\t" // Filter out pixels smaller than min threshold, result stored on XMM0 - - /* Write white or black depending if pixel is alarmed or not */ - "movntdq %%xmm0, (%2)\n\t" // Set the pixel to white or black depending on the result - - /* Update the alarmed pixels count */ -#if defined(__x86_64__) - "movdqa %%xmm9, %%xmm3\n\t" // Move 0x01 mask to XMM3 -#else - "mov $0x01010101, %%eax\n\t" - "movd %%eax, %%xmm3\n\t" - "pshufd $0x0, %%xmm3, %%xmm3\n\t" -#endif - "pxor %%xmm2, %%xmm2\n\t" // Set XMM2 to zeros - - "pand %%xmm0, %%xmm3\n\t" // Set alarmed pixels to 1 in XMM3 - "psadbw %%xmm2, %%xmm3\n\t" // DEST[0-15] and DEST[64-79] contain the results - "paddd %%xmm3, %%xmm4\n\t" // Update the alarmed pixels count - - /* Update XMM0 to contain pixels in poly that meet min and max thresholds */ - "pand %%xmm1, %%xmm0\n\t" // XMM0 = Pixels in poly that meet min and max thresholds - - /* Update the difference accumulator */ - "psadbw %%xmm0, %%xmm2\n\t" // DEST[0-15] and DEST[64-79] contain the results - "paddd %%xmm2, %%xmm5\n\t" // Update the difference accumulator - - /* Move to the next pixels in the row */ - "add $0x10, %2\n\t" // Add 16 to pdiff - "add $0x10, %3\n\t" // Add 16 to ppoly - "add $0x10, %4\n\t" // Add 16 to X - "cmp %10, %4\n\t" // Check if we reached max X - "jb sse2_ap_iter\n\t" // Go for another iteration - - "sub %13, %2\n\t" // Reset pdiff to low X - "sub %13, %3\n\t" // Reset ppoly to low X - "sub %13, %4\n\t" // Reset X to low x - - "add $0x1, %5\n\t" // Increment Y to advance to the next line - "add %12, %2\n\t" // Move pdiff to the next row - "add %12, %3\n\t" // Move ppoly to the next row - - "cmp %11, %5\n\t" // Check if we reached max Y - "jb sse2_ap_iter\n\t" // Go for another iteration - - /* Calculate the alarmed pixels */ - "pshufd $0x56, %%xmm4, %%xmm0\n\t" - "paddd %%xmm4, %%xmm0\n\t" - "movd %%xmm0, %0\n\t" - - /* Calculate the pixels difference */ - "pshufd $0x56, %%xmm5, %%xmm1\n\t" - "paddd %%xmm5, %%xmm1\n\t" - "movd %%xmm1, %1\n\t" - - : "=m" (pixelsalarmed), "=m" (pixelsdifference) -#if (defined(_DEBUG) && !defined(__x86_64__)) /* Use one less register to allow compilation to success on 32bit with omit frame pointer disabled */ - : "r" (pdiff), "r" (ppoly), "r" (x), "r" (y), "m" (*calc_minpthreshold), "m" (*calc_maxpthreshold), "m" (lo_y), "m" (lo_x), "m" (hi_x), "m" (hi_y), "m" (width), "m" (xgap) -#else - : "r" (pdiff), "r" (ppoly), "r" (x), "r" (y), "m" (*calc_minpthreshold), "m" (*calc_maxpthreshold), "m" (lo_y), "m" (lo_x), "r" (hi_x), "m" (hi_y), "m" (width), "m" (xgap) -#endif -#if defined(__x86_64__) - : "%rax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "cc", "memory" -#else - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" -#endif - ); - - /* Store the results */ - *pixel_count = pixelsalarmed; - *pixel_sum = pixelsdifference; -#else - Panic("SSE function called on a non x86\\x86-64 platform"); -#endif -} diff --git a/src/zm_zone.h b/src/zm_zone.h index ae247ada4..6aa45e13d 100644 --- a/src/zm_zone.h +++ b/src/zm_zone.h @@ -95,7 +95,6 @@ protected: protected: void Setup( Monitor *p_monitor, int p_id, const char *p_label, ZoneType p_type, const Polygon &p_polygon, const Rgb p_alarm_rgb, CheckMethod p_check_method, int p_min_pixel_threshold, int p_max_pixel_threshold, int p_min_alarm_pixels, int p_max_alarm_pixels, const Coord &p_filter_box, int p_min_filter_pixels, int p_max_filter_pixels, int p_min_blob_pixels, int p_max_blob_pixels, int p_min_blobs, int p_max_blobs, int p_overload_frames ); void std_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum); - void sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum); public: Zone( Monitor *p_monitor, int p_id, const char *p_label, ZoneType p_type, const Polygon &p_polygon, const Rgb p_alarm_rgb, CheckMethod p_check_method, int p_min_pixel_threshold=15, int p_max_pixel_threshold=0, int p_min_alarm_pixels=50, int p_max_alarm_pixels=75000, const Coord &p_filter_box=Coord( 3, 3 ), int p_min_filter_pixels=50, int p_max_filter_pixels=50000, int p_min_blob_pixels=10, int p_max_blob_pixels=0, int p_min_blobs=0, int p_max_blobs=0, int p_overload_frames=0 )