Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

blur taking significant time on GPU

Here is the CPU version of a function

void getContourCenters(vector<Mat>  &framesToProcess, vector<pointI>& contourCenter)
{    
    size_t j = 0;       
    for (int i = 1; i < framesToProcess.size(); i++)
    {    
                    Mat tempDifferenceImage, tempThresholdImage;
                    vector< vector<Point> > contours;
                    vector<Vec4i> hierarchy;
                    Rect objectBoundingRectangle = Rect(0, 0, 0, 0);
                    absdiff(framesToProcess[i - 1], framesToProcess[i], tempDifferenceImage);
                    threshold(tempDifferenceImage, tempThresholdImage, SENSITIVITY_VALUE, 255, THRESH_BINARY);
                    blur(tempThresholdImage, tempThresholdImage, Size(BLUR_SIZE, BLUR_SIZE));
                    findContours(tempThresholdImage, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
                    cout << "Time to findContours: " << t1.elapsed() << endl;
                    t1.restart();
                    for (int k = 0; k < contours.size(); ++k)
                    {
                            objectBoundingRectangle = boundingRect(contours[k]);
                            int xpos = objectBoundingRectangle.x + objectBoundingRectangle.width / 2;
                            int ypos = objectBoundingRectangle.y + objectBoundingRectangle.height / 2;
                            contourCenter.push_back(mp(xpos, ypos, j++));
                    }           

    }
}

This function takes about 1.5 seconds to execute for 30 grayscale images. Now I optimized this code for GPU

  void getContourCenters(vector<gpu::GpuMat>  &framesToProcess, vector<pointI>& contourCenter)
{
    size_t j = 0;

    for (int i = 1; i < framesToProcess.size(); i++)
    {

            gpu::GpuMat tempDifferenceImage, tempThresholdImage, tempBlurredImage;
            vector< vector<Point> > contours;
            vector<Vec4i> hierarchy;
            Rect objectBoundingRectangle = Rect(0, 0, 0, 0);
            gpu::absdiff(framesToProcess[i - 1], framesToProcess[i], tempDifferenceImage);
            gpu::threshold(tempDifferenceImage, tempThresholdImage, SENSITIVITY_VALUE, 255, THRESH_BINARY);
           //If i comment following line the function works fine and executes in 0.5 second but if I uncomment  
            // following line it takes more than 30 seconds to execute the function 
            gpu::blur(tempThresholdImage, tempBlurredImage, Size(BLUR_SIZE, BLUR_SIZE));

            Mat contourImage( tempThresholdImage );
            Mat contourImage( tempBlurredImage );
            findContours(contourImage, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
            for (int k = 0; k < contours.size(); ++k)
            {
                    objectBoundingRectangle = boundingRect(contours[k]);
                    int xpos = objectBoundingRectangle.x + objectBoundingRectangle.width / 2;
                    int ypos = objectBoundingRectangle.y + objectBoundingRectangle.height / 2;
                    contourCenter.push_back(mp(xpos, ypos, j++));
            }
    }

}

The GPU version of code takes about 0.5 second to execute when the function gpu::blur is commented but if this line is uncommented it takes more than 30 seconds or sometimes more ( i don't have that much patience so i kill the process ) . Can anyone point what is the problem with this code? Thank you in advance.

blur taking significant time on GPU

Here is the CPU version of a function

void getContourCenters(vector<Mat>  &framesToProcess, vector<pointI>& contourCenter)
{    
    size_t j = 0;       
    for (int i = 1; i < framesToProcess.size(); i++)
    {    
                    Mat tempDifferenceImage, tempThresholdImage;
                    vector< vector<Point> > contours;
                    vector<Vec4i> hierarchy;
                    Rect objectBoundingRectangle = Rect(0, 0, 0, 0);
                    absdiff(framesToProcess[i - 1], framesToProcess[i], tempDifferenceImage);
                    threshold(tempDifferenceImage, tempThresholdImage, SENSITIVITY_VALUE, 255, THRESH_BINARY);
                    blur(tempThresholdImage, tempThresholdImage, Size(BLUR_SIZE, BLUR_SIZE));
                    findContours(tempThresholdImage, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
                    cout << "Time to findContours: " << t1.elapsed() << endl;
                    t1.restart();
                    for (int k = 0; k < contours.size(); ++k)
                    {
                            objectBoundingRectangle = boundingRect(contours[k]);
                            int xpos = objectBoundingRectangle.x + objectBoundingRectangle.width / 2;
                            int ypos = objectBoundingRectangle.y + objectBoundingRectangle.height / 2;
                            contourCenter.push_back(mp(xpos, ypos, j++));
                    }           

    }
}

This function takes about 1.5 seconds to execute for 30 grayscale images. Now I optimized this code for GPU

  void getContourCenters(vector<gpu::GpuMat>  &framesToProcess, vector<pointI>& contourCenter)
{
    size_t j = 0;

    for (int i = 1; i < framesToProcess.size(); i++)
    {

            gpu::GpuMat tempDifferenceImage, tempThresholdImage, tempBlurredImage;
            vector< vector<Point> > contours;
            vector<Vec4i> hierarchy;
            Rect objectBoundingRectangle = Rect(0, 0, 0, 0);
            gpu::absdiff(framesToProcess[i - 1], framesToProcess[i], tempDifferenceImage);
            gpu::threshold(tempDifferenceImage, tempThresholdImage, SENSITIVITY_VALUE, 255, THRESH_BINARY);
           //If i comment following line the function works fine and executes in 0.5 second but if I uncomment  
            // following line it takes more than 30 seconds to execute the function 
            gpu::blur(tempThresholdImage, tempBlurredImage, Size(BLUR_SIZE, BLUR_SIZE));

            Mat contourImage( tempThresholdImage );
            Mat contourImage( tempBlurredImage );
            findContours(contourImage, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
            for (int k = 0; k < contours.size(); ++k)
            {
                    objectBoundingRectangle = boundingRect(contours[k]);
                    int xpos = objectBoundingRectangle.x + objectBoundingRectangle.width / 2;
                    int ypos = objectBoundingRectangle.y + objectBoundingRectangle.height / 2;
                    contourCenter.push_back(mp(xpos, ypos, j++));
            }
    }

}

The GPU version of code takes about 0.5 second to execute when the function gpu::blur is commented but if this line is uncommented it takes more than 30 seconds or sometimes more ( i don't have that much patience so i kill the process ) . Can anyone point what is the problem with this code? Thank you in advance.