Ask Your Question

Revision history [back]

Although the answer of pklab is correct, I still want to add some comments (and because the comment's length is limited, so I put my comments in the answer part). Firstly, parallel techniques require more work to do for performance goal (high speed). Hence, it should be applied in situations where performance is a must and the computation is heavy. Secondly, your benchmark results are just for small images captured from camera, you did not mention the images' resolution so may be it is not a fair comparison. Here my result (on Duo Core machine 2x2.4, Windows 64, OpenCV 3, VS 2013): 30% CPU used Video width: 640 Video height: 480 Frame count: 200 Parallel:8.66ms Sequential:13.47ms Overhead:0.6ms With a big video file: 34% CPU used Video width: 1280 Video height: 720 Frame count: 200 Parallel:17.64ms Sequential:25.76ms Overhead:0.21ms

And the last: I rewrited @pklab code to use Mat reference (instead of Mat pointer) and can be run with both file and camera as follow:

include <thread>

include <iostream>

include <string>

include <opencv2 opencv.hpp="">

using namespace cv; using namespace std;

// here we use canny void Algo1(const cv::Mat &src, cv::Mat &dst) { cvtColor(src, dst, CV_BGR2GRAY); GaussianBlur(dst, dst, Size(7, 7), 1.5, 1.5); Canny(dst, dst, 0, 30, 3); }

// here we use morphology gradient void Algo2(const cv::Mat &src, cv::Mat & dst) { int morph_size = 1; cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1); cv::Point anchor(morph_size, morph_size); Mat element = getStructuringElement(MORPH_RECT, sz, anchor); morphologyEx(src, dst, MORPH_GRADIENT, element); }

// empty function to measure overhead void Test() { return; }

int main(int argc, char * argv[]) { VideoCapture cap; if (0==string("0").compare(string(argv[1]))) cap.open(0); // open the default camera else cap.open(argv[1]); // open video file if (!cap.isOpened()) // check if we succeeded return -1; cout << "Video width:" << cap.get(CV_CAP_PROP_FRAME_WIDTH) << endl; cout << "Video width:" << cap.get(CV_CAP_PROP_FRAME_HEIGHT) << endl; clock_t parallel = 0, sequential = 0, testParallel = 0, testSequential = 0; clock_t start, stop; int cnt = 0; for (;cnt<200;) { Mat src, dst1, dst2; cap >> src; // get a new frame from camera imshow("src", src);

    //Try it with sequential way
    start = clock();
    Algo1(src, dst1);
    Algo2(src, dst2);
    stop = clock();
    sequential += (stop - start);

    imshow("Sequential Algo1", dst1);
    imshow("Sequential Algo2", dst2);

    // try simple parallel processing way
    start = clock();
    std::thread th1(&Algo1, src, dst1);
    std::thread th2(&Algo2, src, dst2);
    th1.join();
    th2.join();
    stop = clock();
    parallel += (stop - start);

    imshow("Paralllel Algo1", dst1);
    imshow("Paralllel Algo2", dst2);

    // measure threading overhead (2 calls)
    int n = 2;
    start = clock();
    Test();
    Test();
    stop = clock();
    testSequential += (stop - start);

    start = clock();
    std::thread thTest1(&Test);
    std::thread thTest2(&Test);
    thTest1.join();
    thTest2.join();
    stop = clock();
    testParallel += (stop - start);

    cnt++;

    if (waitKey(30) >= 0)
        break;
}

double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;

std::cout << std::endl << "Average processing time (2 calls):" << std::endl
    << "Parallel: " << parTime << "ms"
    << "\t Sequential: " << seqTime << "ms"
    << "\t Overhead: " << overHead << "ms"
    << "";

std::cout << std::endl << "Press a Enter to terminate ";
std::cin.get();

}

Although the answer of pklab is correct, I still want to add some comments (and because the comment's length is limited, so I put my comments in the answer part). Firstly, parallel techniques require more work to do for performance goal (high speed). Hence, it should be applied in situations where performance is a must and the computation is heavy. Secondly, your benchmark results are just for small images captured from camera, you did not mention the images' resolution so may be it is not a fair comparison. Here my result (on Duo Core machine 2x2.4, Windows 64, OpenCV 3, VS 2013): 2013):

30% CPU used
Video width: 640
Video height: 480
Frame count: 200
Parallel:8.66ms Sequential:13.47ms Overhead:0.6ms

With a big video file: file:

34% CPU used
Video width: 1280
Video height: 720
Frame count: 200
Parallel:17.64ms Sequential:25.76ms Overhead:0.21ms 

Overhead:0.21ms

And the last: I rewrited @pklab code to use Mat reference (instead of Mat pointer) and can be run with both file and camera as follow:

include <thread>

include <iostream>

include <string>

include <opencv2 opencv.hpp="">

#include <thread>
#include <iostream>
#include <string>
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

std; // here we use canny void Algo1(const cv::Mat &src, cv::Mat &dst) { cvtColor(src, dst, CV_BGR2GRAY); GaussianBlur(dst, dst, Size(7, 7), 1.5, 1.5); Canny(dst, dst, 0, 30, 3); }

} // here we use morphology gradient void Algo2(const cv::Mat &src, cv::Mat & dst) { int morph_size = 1; cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1); cv::Point anchor(morph_size, morph_size); Mat element = getStructuringElement(MORPH_RECT, sz, anchor); morphologyEx(src, dst, MORPH_GRADIENT, element); }

} // empty function to measure overhead void Test() { return; }

} int main(int argc, char * argv[]) { VideoCapture cap; if (0==string("0").compare(string(argv[1]))) cap.open(0); // open the default camera else cap.open(argv[1]); // open video file if (!cap.isOpened()) // check if we succeeded return -1; cout << "Video width:" << cap.get(CV_CAP_PROP_FRAME_WIDTH) << endl; cout << "Video width:" << cap.get(CV_CAP_PROP_FRAME_HEIGHT) << endl; clock_t parallel = 0, sequential = 0, testParallel = 0, testSequential = 0; clock_t start, stop; int cnt = 0; for (;cnt<200;) { Mat src, dst1, dst2; cap >> src; // get a new frame from camera imshow("src", src);

src);

        //Try it with sequential way
     start = clock();
     Algo1(src, dst1);
     Algo2(src, dst2);
     stop = clock();
     sequential += (stop - start);

     imshow("Sequential Algo1", dst1);
     imshow("Sequential Algo2", dst2);

     // try simple parallel processing way
     start = clock();
     std::thread th1(&Algo1, src, dst1);
     std::thread th2(&Algo2, src, dst2);
     th1.join();
     th2.join();
     stop = clock();
     parallel += (stop - start);

     imshow("Paralllel Algo1", dst1);
     imshow("Paralllel Algo2", dst2);

     // measure threading overhead (2 calls)
     int n = 2;
     start = clock();
     Test();
     Test();
     stop = clock();
     testSequential += (stop - start);

     start = clock();
     std::thread thTest1(&Test);
     std::thread thTest2(&Test);
     thTest1.join();
     thTest2.join();
     stop = clock();
     testParallel += (stop - start);

     cnt++;

     if (waitKey(30) >= 0)
         break;
 }

 double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
 double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
 double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;

 std::cout << std::endl << "Average processing time (2 calls):" << std::endl
     << "Parallel: " << parTime << "ms"
     << "\t Sequential: " << seqTime << "ms"
     << "\t Overhead: " << overHead << "ms"
     << "";

 std::cout << std::endl << "Press a Enter to terminate ";
 std::cin.get();
}

}

Although the answer of pklab is correct, I still want to add some comments (and because the comment's length is limited, so I put my comments in the answer part). Firstly, parallel techniques require more work to do for performance goal (high speed). Hence, it should be applied in situations where performance is a must and the computation is heavy. Secondly, your benchmark results are just for small images captured from camera, you did not mention the images' resolution so may be it is not a fair comparison. Here my result (on Duo Core machine 2x2.4, Windows 64, OpenCV 3, VS 2013):

30% CPU used
Video width: 640
Video height: 480
Frame count: 200
Parallel:8.66ms Sequential:13.47ms Overhead:0.6ms

With a big video file:

34% CPU used
Video width: 1280
Video height: 720
Frame count: 200
Parallel:17.64ms Sequential:25.76ms Overhead:0.21ms

And the last: I rewrited @pklab code to use Mat reference (instead of Mat pointer) and can be run with both file and camera as follow:

#include <thread>
#include <iostream>
#include <string>
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

// here we use canny 
void Algo1(const cv::Mat &src, cv::Mat &dst)
{
    cvtColor(src, dst, CV_BGR2GRAY);
    GaussianBlur(dst, dst, Size(7, 7), 1.5, 1.5);
    Canny(dst, dst, 0, 30, 3);
}

// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat & dst)
{
    int morph_size = 1;
    cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
    cv::Point anchor(morph_size, morph_size);
    Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
    morphologyEx(src, dst, MORPH_GRADIENT, element);
}

// empty function to measure overhead
void Test()
{
    return;
}

int main(int argc, char * argv[])
{
    VideoCapture cap; 
    if (0==string("0").compare(string(argv[1])))
        cap.open(0);  // open the default camera
    else
        cap.open(argv[1]); // open video file
    if (!cap.isOpened()) // check if we succeeded
        return -1;
    cout << "Video width:" << cap.get(CV_CAP_PROP_FRAME_WIDTH) << endl;
    cout << "Video width:" height:" << cap.get(CV_CAP_PROP_FRAME_HEIGHT) << endl;
    clock_t  parallel = 0, sequential = 0, testParallel = 0, testSequential = 0;
    clock_t start, stop;
    int cnt = 0;
    for (;cnt<200;)
    {
        Mat src, dst1, dst2;
        cap >> src; // get a new frame from camera
        imshow("src", src);

        //Try it with sequential way
        start = clock();
        Algo1(src, dst1);
        Algo2(src, dst2);
        stop = clock();
        sequential += (stop - start);

        imshow("Sequential Algo1", dst1);
        imshow("Sequential Algo2", dst2);

        // try simple parallel processing way
        start = clock();
        std::thread th1(&Algo1, src, dst1);
        std::thread th2(&Algo2, src, dst2);
        th1.join();
        th2.join();
        stop = clock();
        parallel += (stop - start);

        imshow("Paralllel Algo1", dst1);
        imshow("Paralllel Algo2", dst2);

        // measure threading overhead (2 calls)
        int n = 2;
        start = clock();
        Test();
        Test();
        stop = clock();
        testSequential += (stop - start);

        start = clock();
        std::thread thTest1(&Test);
        std::thread thTest2(&Test);
        thTest1.join();
        thTest2.join();
        stop = clock();
        testParallel += (stop - start);

        cnt++;

        if (waitKey(30) >= 0)
            break;
    }

    double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
    double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
    double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;

    std::cout << std::endl << "Average processing time (2 calls):" << std::endl
        << "Parallel: " << parTime << "ms"
        << "\t Sequential: " << seqTime << "ms"
        << "\t Overhead: " << overHead << "ms"
        << "";

    std::cout << std::endl << "Press a Enter to terminate ";
    std::cin.get();
}