Hi,
I have a tracking algorithm with two main parts; 1. tracking algorithm 2. video overlay.
A lot of stuff needs to be overlayed and it takes a lot of time. I was thinking of parallelizing the two parts using openMP with minimal effort. So I thought of using the sections
directive available in openMP. The following code is just a crude form of what I am trying to achieve:
#include "opencv2\highgui\highgui.hpp"
#include "opencv2\core\core.hpp"
#include "opencv2\imgproc\imgproc.hpp"
#include <iostream>
#include <omp.h>
#include "Timer.h"
using namespace std;
using namespace cv;
int main()
{
VideoCapture cap(0); //start the webcam
Mat frame, roi;
Timer t; //timer class
int frameNo = 0;
double summ = 0;
while (true)
{
cap >> frame;
frameNo++;
roi = frame(Rect(100, 100, 300, 300)).clone(); //extract a deep copy of region of interest; for tracking purposes
t.start(); //start the timer
#pragma omp parallel sections
{
#pragma omp section //first section: tracking algorithm
{
//some tracking algorithm below which uses only "roi" variable
GaussianBlur(roi, roi, Size(5, 5), 0, 0, BORDER_REPLICATE);
}
#pragma omp section //second section: overlay video
{
//a lot of overlay in different video parts which uses only "frame" variable
putText(frame, "string 1", Point(10, 10), 1, 1, Scalar(1));
putText(frame, "string 2", Point(20, 20), 1, 1, Scalar(1));
putText(frame, "string 3", Point(30, 30), 1, 1, Scalar(1));
putText(frame, "string 4", Point(40, 40), 1, 1, Scalar(1));
putText(frame, "string 5", Point(50, 50), 1, 1, Scalar(1));
putText(frame, "string 6", Point(60, 60), 1, 1, Scalar(1));
putText(frame, "string 7", Point(70, 70), 1, 1, Scalar(1));
putText(frame, "string 8", Point(80, 80), 1, 1, Scalar(1));
putText(frame, "string 9", Point(90, 90), 1, 1, Scalar(1));
putText(frame, "string 10", Point(100, 100), 1, 1, Scalar(1));
}
}
t.stop(); //stop the timer
summ += t.getElapsedTimeInMilliSec();
if (frameNo % 10 == 0) //average total time over 10 frames
{
cout << summ / 10 << endl;
summ = 0;
}
imshow("frame", frame);
if (waitKey(10) == 27)
break;
}
return 0;
}
I don't seem to see a performance boost with timing analysis and in some cases the timing with openMP gets worse even when I am using different variables in my sections
My question is whether I am using the right approach (using sections directive) for my case or is there a better way to parallelize my existing code using openMP with minimal effort?
Thanks.