Removed the now unused SSE2 AlarmedPixels code.

This commit is contained in:
Kfir Itzhak
2011-10-27 17:50:39 +02:00
parent d7e9c6ad5c
commit 8e9ccfe1ec
2 changed files with 0 additions and 215 deletions

View File

@@ -1033,217 +1033,3 @@ void Zone::std_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsig
*pixel_count = pixelsalarmed;
*pixel_sum = pixelsdifference;
}
__attribute__((noinline,__target__("sse2"))) void Zone::sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static uint8_t calc_maxpthreshold[16] = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
__attribute__((aligned(16))) static uint8_t calc_minpthreshold[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
static uint8_t current_minpthreshold = 0;
static uint8_t current_maxpthreshold = 255;
unsigned int minpthreshold = min_pixel_threshold;
unsigned int maxpthreshold = max_pixel_threshold;
uint32_t pixelsalarmed = 0;
uint32_t pixelsdifference = 0;
unsigned int lo_y = polygon.LoY();
unsigned int hi_y = polygon.HiY()+1;
unsigned int lo_x = polygon.LoX();
unsigned int hi_x = polygon.HiX()+1;
if(maxpthreshold == 0)
maxpthreshold = 255;
if(minpthreshold != current_minpthreshold) {
for(unsigned int i=0;i<sizeof(calc_minpthreshold);i++)
calc_minpthreshold[i] = minpthreshold>>1;
current_minpthreshold = minpthreshold;
}
if(maxpthreshold != current_maxpthreshold) {
for(unsigned int i=0;i<sizeof(calc_maxpthreshold);i++)
calc_maxpthreshold[i] = maxpthreshold>>1;
current_maxpthreshold = maxpthreshold;
}
/*
* We have to work on 16 byte aligned addresses.
* Assume width is multiples of 16 and align the lo_x and hi_x to be on a 16 byte boundary
*/
if((lo_x % 16) != 0) {
lo_x = lo_x - (lo_x % 16);
}
if((hi_x % 16) != 0) {
hi_x = hi_x + (16 - (hi_x % 16));
if( hi_x > pdiff_image->Width() )
/* Clamp hi_x to width */
hi_x = pdiff_image->Width();
}
if( hi_y > pdiff_image->Height() ) {
/* Clamp hi y to height */
hi_y = pdiff_image->Height();
}
unsigned int x = lo_x;
unsigned int y = lo_y;
unsigned long xgap = hi_x - lo_x;
unsigned long width = pdiff_image->Width();
uint8_t* pdiff = (uint8_t*)pdiff_image->Buffer(lo_x, lo_y);
const uint8_t* ppoly = ppoly_image->Buffer(lo_x, lo_y);
/* Some sanity checks */
if((width % 16) != 0) {
Fatal("Image width is not multiples of 16!");
}
if((xgap % 16) != 0) {
/* Shouldn't happen but just in case */
Fatal("Difference between calculated hi_x and lo_x is not multiples of 16");
}
if(lo_x == hi_x) {
Error("lo_x and hi_x are identical, nothing to scan. Scanning the whole line instead");
lo_x = 0;
hi_x = width;
}
/* XMM0,1,2,3 - General purpose */
/* XMM4 - alarmed pixels count */
/* XMM5 - difference accumulator */
/* XMM6 - min pixel threshold mask */
/* XMM7 - max pixel threshold mask */
/* XMM8 - divide mask */
/* XMM9 - 0x01 mask */
/*
Register map:
%0 - pixelsalarmed
%1 - pixelsdifference
%2 - pdiff
%3 - ppoly
%4 - X
%5 - Y
%6 - min pixel mask
%7 - max pixel mask
%8 - lo_y
%9 - lo_x
%10 - hi_x
%11 - hi_y
%12 - width
%13 - xgap
*/
/* Initial setup
* set X to lo_x
* set Y to lo_y
* set pdiff to pdiff start + (width * lo_y) + lo_x
* set ppoly to ppoly start + (width * lo_y) + lo_x
* set xgap to hi_x - lo_x
*/
__asm__ __volatile__ (
"pxor %%xmm4, %%xmm4\n\t" // Zero out the alarmed pixels count
"pxor %%xmm5, %%xmm5\n\t" // Zero out the difference accumulator
"movdqa %6, %%xmm6\n\t" // Load the min pixel threshold (divided by 2)
"movdqa %7, %%xmm7\n\t" // Load the max pixel threshold (divided by 2)
#if defined(__x86_64__)
"mov $0x7F7F7F7F, %%eax\n\t" /* Divide mask */
"movd %%eax, %%xmm8\n\t"
"pshufd $0x0, %%xmm8, %%xmm8\n\t"
"mov $0x01010101, %%eax\n\t" /* 0x1 mask */
"movd %%eax, %%xmm9\n\t"
"pshufd $0x0, %%xmm9, %%xmm9\n\t"
#endif
/* Iteration start */
"sse2_ap_iter:\n\t"
"movdqa (%2), %%xmm0\n\t" // Load the pdiff
"movdqa (%3), %%xmm1\n\t" // Load the ppoly
"pand %%xmm0, %%xmm1\n\t" // Filter out pixels not inside polygon. Result stored on XMM1
"movdqa %%xmm1, %%xmm2\n\t" // Move the result into XMM2
"psrlq $0x1, %%xmm2\n\t" // Divide the result by 2 (part 1)
#if defined(__x86_64__)
"pand %%xmm8, %%xmm2\n\t" // Divide the result by 2 (part 2)
#else
"mov $0x7F7F7F7F, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"pand %%xmm0, %%xmm2\n\t" // Divide the result by 2 (part 2)
#endif
/* Filter out pixels bigger than max threshold and update XMM0 */
"movdqa %%xmm7, %%xmm0\n\t" // Copy max threshold to XMM0
"pcmpgtb %%xmm2, %%xmm0\n\t" // Filter out pixels bigger than max threshold, result stored on XMM0
"pand %%xmm2, %%xmm0\n\t" // XMM0 = Dividied pixels that are in poly and meet maximum threshold
/* Filter out pixels smaller than min threshold */
"pcmpgtb %%xmm6, %%xmm0\n\t" // Filter out pixels smaller than min threshold, result stored on XMM0
/* Write white or black depending if pixel is alarmed or not */
"movntdq %%xmm0, (%2)\n\t" // Set the pixel to white or black depending on the result
/* Update the alarmed pixels count */
#if defined(__x86_64__)
"movdqa %%xmm9, %%xmm3\n\t" // Move 0x01 mask to XMM3
#else
"mov $0x01010101, %%eax\n\t"
"movd %%eax, %%xmm3\n\t"
"pshufd $0x0, %%xmm3, %%xmm3\n\t"
#endif
"pxor %%xmm2, %%xmm2\n\t" // Set XMM2 to zeros
"pand %%xmm0, %%xmm3\n\t" // Set alarmed pixels to 1 in XMM3
"psadbw %%xmm2, %%xmm3\n\t" // DEST[0-15] and DEST[64-79] contain the results
"paddd %%xmm3, %%xmm4\n\t" // Update the alarmed pixels count
/* Update XMM0 to contain pixels in poly that meet min and max thresholds */
"pand %%xmm1, %%xmm0\n\t" // XMM0 = Pixels in poly that meet min and max thresholds
/* Update the difference accumulator */
"psadbw %%xmm0, %%xmm2\n\t" // DEST[0-15] and DEST[64-79] contain the results
"paddd %%xmm2, %%xmm5\n\t" // Update the difference accumulator
/* Move to the next pixels in the row */
"add $0x10, %2\n\t" // Add 16 to pdiff
"add $0x10, %3\n\t" // Add 16 to ppoly
"add $0x10, %4\n\t" // Add 16 to X
"cmp %10, %4\n\t" // Check if we reached max X
"jb sse2_ap_iter\n\t" // Go for another iteration
"sub %13, %2\n\t" // Reset pdiff to low X
"sub %13, %3\n\t" // Reset ppoly to low X
"sub %13, %4\n\t" // Reset X to low x
"add $0x1, %5\n\t" // Increment Y to advance to the next line
"add %12, %2\n\t" // Move pdiff to the next row
"add %12, %3\n\t" // Move ppoly to the next row
"cmp %11, %5\n\t" // Check if we reached max Y
"jb sse2_ap_iter\n\t" // Go for another iteration
/* Calculate the alarmed pixels */
"pshufd $0x56, %%xmm4, %%xmm0\n\t"
"paddd %%xmm4, %%xmm0\n\t"
"movd %%xmm0, %0\n\t"
/* Calculate the pixels difference */
"pshufd $0x56, %%xmm5, %%xmm1\n\t"
"paddd %%xmm5, %%xmm1\n\t"
"movd %%xmm1, %1\n\t"
: "=m" (pixelsalarmed), "=m" (pixelsdifference)
#if (defined(_DEBUG) && !defined(__x86_64__)) /* Use one less register to allow compilation to success on 32bit with omit frame pointer disabled */
: "r" (pdiff), "r" (ppoly), "r" (x), "r" (y), "m" (*calc_minpthreshold), "m" (*calc_maxpthreshold), "m" (lo_y), "m" (lo_x), "m" (hi_x), "m" (hi_y), "m" (width), "m" (xgap)
#else
: "r" (pdiff), "r" (ppoly), "r" (x), "r" (y), "m" (*calc_minpthreshold), "m" (*calc_maxpthreshold), "m" (lo_y), "m" (lo_x), "r" (hi_x), "m" (hi_y), "m" (width), "m" (xgap)
#endif
#if defined(__x86_64__)
: "%rax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "cc", "memory"
#else
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif
);
/* Store the results */
*pixel_count = pixelsalarmed;
*pixel_sum = pixelsdifference;
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}

View File

@@ -95,7 +95,6 @@ protected:
protected:
void Setup( Monitor *p_monitor, int p_id, const char *p_label, ZoneType p_type, const Polygon &p_polygon, const Rgb p_alarm_rgb, CheckMethod p_check_method, int p_min_pixel_threshold, int p_max_pixel_threshold, int p_min_alarm_pixels, int p_max_alarm_pixels, const Coord &p_filter_box, int p_min_filter_pixels, int p_max_filter_pixels, int p_min_blob_pixels, int p_max_blob_pixels, int p_min_blobs, int p_max_blobs, int p_overload_frames );
void std_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum);
void sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum);
public:
Zone( Monitor *p_monitor, int p_id, const char *p_label, ZoneType p_type, const Polygon &p_polygon, const Rgb p_alarm_rgb, CheckMethod p_check_method, int p_min_pixel_threshold=15, int p_max_pixel_threshold=0, int p_min_alarm_pixels=50, int p_max_alarm_pixels=75000, const Coord &p_filter_box=Coord( 3, 3 ), int p_min_filter_pixels=50, int p_max_filter_pixels=50000, int p_min_blob_pixels=10, int p_max_blob_pixels=0, int p_min_blobs=0, int p_max_blobs=0, int p_overload_frames=0 )