1 | initial version |
You could use threads just for run your algoritms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce hig overhead.
On my computer, results shows than sequential way is about 2time faster than simple threading !
#include <thread>
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::stopl << "Average processing time:" << std::stopl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "";
std::cout << std::stopl << "Press a Enter to terminate ";
std::cin.get();
}
2 | No.2 Revision |
You could use threads just for run your algoritms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce hig big overhead.
On my computer, results shows show than sequential way is about 2time faster than simple threading !threading, it depends on background computer load, sequential might be up to 3 time faster!
#include <thread>
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::stopl << "Average processing time:" << std::stopl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "";
std::cout << std::stopl << "Press a Enter to terminate ";
std::cin.get();
}
3 | No.3 Revision |
You could use threads just for run your algoritms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation.implementation using a stream from a webcam as input.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce big overhead.
On my computer, results show than sequential way is faster than simple threading, it depends on background computer load, sequential might be up to 3 time faster!
#include <thread>
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::stopl << "Average processing time:" << std::stopl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "";
std::cout << std::stopl << "Press a Enter to terminate ";
std::cin.get();
}
4 | No.4 Revision |
You could just use threads just for to run your algoritms algorithms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation using a stream from a webcam as input.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce big overhead.
On my computer, results show than that the sequential way is faster than simple threading, it depends on background computer load, sequential might be up to 3 time faster!faster !
#include <thread>
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::stopl << "Average processing time:" << std::stopl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "";
std::cout << std::stopl << "Press a Enter to terminate ";
std::cin.get();
}
5 | No.5 Revision |
You could just use threads to run your algorithms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation using a stream from a webcam as input.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce big overhead.
On my computer, results show that the sequential way is faster than simple threading, it depends on background computer load, sequential might be up to 3 2 time faster !faster.
EDIT: Added measure of treading overhead.. Look at my timing:
Parallel: 16.3ms Sequential: 12.8ms Overhead:3.5ms
Parallel: 8.1ms Sequential: 4.3ms Overhead:4.9ms
3.6ms Sequential: 2.7ms Overhead:0.6ms
the code:
#include <thread>
#include <opencv2/opencv.hpp>
using namespace cv;
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
// empty function to measure overhead
void Test()
{
return;
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0, testParallel = 0,testSequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
// measure threading overhead (2 calls)
int n = 2;
start = clock();
Test();
Test();
stop = clock();
testSequential += (stop - start);
start = clock();
std::thread thTest1(&Test);
std::thread thTest2(&Test);
thTest1.join();
thTest2.join();
stop = clock();
testParallel += (stop - start);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::stopl std::endl << "Average processing time:" << std::stopl
time (2 calls):" << std::endl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "\t Overhead: " << overHead << "ms"
<< "";
std::cout << std::stopl std::endl << "Press a Enter to terminate ";
std::cin.get();
}
6 | No.6 Revision |
You could just use threads to run your algorithms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation using a stream from a webcam as input.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce big overhead.
On my computer, results show that the sequential way is faster than simple threading, it depends on background computer load, sequential might be up to 2 time faster.
EDIT: Added measure of treading overhead.. Look at my timing:
Parallel: 16.3ms Sequential: 12.8ms Parallel:16.3ms Sequential:12.8ms Overhead:3.5ms
Parallel: 8.1ms Sequential: 4.3ms Parallel:8.1ms Sequential:4.3ms Overhead:4.9ms
3.6ms Sequential: 2.7ms Parallel:3.6ms Sequential:2.7ms Overhead:0.6ms
the code:
#include <thread>
#include <opencv2/opencv.hpp>
using namespace cv;
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
// empty function to measure overhead
void Test()
{
return;
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0, testParallel = 0,testSequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
// measure threading overhead (2 calls)
int n = 2;
start = clock();
Test();
Test();
stop = clock();
testSequential += (stop - start);
start = clock();
std::thread thTest1(&Test);
std::thread thTest2(&Test);
thTest1.join();
thTest2.join();
stop = clock();
testParallel += (stop - start);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::endl << "Average processing time (2 calls):" << std::endl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "\t Overhead: " << overHead << "ms"
<< "";
std::cout << std::endl << "Press a Enter to terminate ";
std::cin.get();
}
7 | No.7 Revision |
You could just use threads to run your algorithms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation using a stream from a webcam as input.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce big overhead.
On my computer, results show that the sequential way is faster than simple threading, it depends on background computer load, sequential might be up to 2 time faster.
EDIT: Added measure of treading overhead.. Look at my timing:timing (win7/64, intel i3):
Parallel:16.3ms Sequential:12.8ms Overhead:3.5ms
Parallel:8.1ms Sequential:4.3ms Overhead:4.9ms
Parallel:3.6ms Sequential:2.7ms Overhead:0.6ms
Parallel:11.65ms Sequential:11.48ms Overhead:0.67ms
Parallel:8.67ms Sequential:8.37ms Overhead:0.69ms
the code:
#include <thread>
#include <opencv2/opencv.hpp>
using namespace cv;
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
// empty function to measure overhead
void Test()
{
return;
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0, testParallel = 0,testSequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
// measure threading overhead (2 calls)
int n = 2;
start = clock();
Test();
Test();
stop = clock();
testSequential += (stop - start);
start = clock();
std::thread thTest1(&Test);
std::thread thTest2(&Test);
thTest1.join();
thTest2.join();
stop = clock();
testParallel += (stop - start);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::endl << "Average processing time (2 calls):" << std::endl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "\t Overhead: " << overHead << "ms"
<< "";
std::cout << std::endl << "Press a Enter to terminate ";
std::cin.get();
}
8 | No.8 Revision |
You could just use threads to run your algorithms but don't expect faster performance because:
Below is simple example, here I'm comparing sequential vs parallel implementation using a stream from a webcam as input.
I'm showing how to apply 2 different algorithms over same frame, using 2 sequential calls and simple threading. The example below suffering of poor threading implementation because thread construction will introduce big overhead.
On my computer, results show that the sequential way is faster than simple threading, it depends on background computer load, sequential might be up to 2 time faster.
EDIT: Added measure of treading overhead.. Look at my timing (win7/64, intel i3):i3 2x2.53Ghz):
Parallel:16.3ms Sequential:12.8ms Overhead:3.5ms
Parallel:8.1ms Sequential:4.3ms Overhead:4.9ms
Parallel:3.6ms Sequential:2.7ms Overhead:0.6ms
Parallel:11.65ms Sequential:11.48ms Overhead:0.67ms
Parallel:8.67ms Sequential:8.37ms Overhead:0.69ms
EDIT2: Considering tuannhtn answer, looks interesting to investigate a bit over different results
For sure advanced parallel programming in IPP improves overall performance but really on Intel i3 I can't see any improvement between sequential and parallel approach. I suppose that difference is due to different processor architecture.
Core Duo 2x2.4 and Intel i3 2x2.53 have 2 cores but CoreDuo doesn't have Hyper-Threading and SmartCache.
When Hyper-Threading is available, some operations share the execution resources automatically in parallel (I/O, cache, bus interface..) on more logical processor. Hyper-Threading and SmartCache make more efficient use of available execution resources boosting sequential approach.
On CoreDuo load balancing on is demanded to developer than parallel approach gets better result.
This can explains why parallel approach is better on CoreDuo but is close to sequential approach on Intel i3. Looking at performance with video 640x480:
Parallel:8.66ms Sequential:13.47ms Overhead:0.6ms
Parallel:8.67ms Sequential:8.37ms Overhead:0.69ms
the code:
#include <thread>
#include <opencv2/opencv.hpp>
using namespace cv;
// here we use canny
void Algo1(const cv::Mat &src, cv::Mat *dst)
{
cvtColor(src, *dst, CV_BGR2GRAY);
GaussianBlur(*dst, *dst, Size(7, 7), 1.5, 1.5);
Canny(*dst, *dst, 0, 30, 3);
}
// here we use morphology gradient
void Algo2(const cv::Mat &src, cv::Mat *dst)
{
int morph_size = 1;
cv::Size sz(2 * morph_size + 1, 2 * morph_size + 1);
cv::Point anchor(morph_size, morph_size);
Mat element = getStructuringElement(MORPH_RECT, sz, anchor);
morphologyEx(src, *dst, MORPH_GRADIENT, element);
}
// empty function to measure overhead
void Test()
{
return;
}
int main()
{
VideoCapture cap(-1); // open the default camera
if (!cap.isOpened()) // check if we succeeded
return -1;
clock_t parallel = 0, sequential = 0, testParallel = 0,testSequential = 0;
clock_t start, stop;
int cnt=0;
for (;;)
{
Mat src,dst1,dst2;
cap >> src; // get a new frame from camera
imshow("src", src);
//Try it with sequential way
start = clock();
Algo1(src, &dst1);
Algo2(src, &dst2);
stop = clock();
sequential += (stop - start);
imshow("Sequential Algo1", dst1);
imshow("Sequential Algo2", dst2);
// try simple parallel processing way
start = clock();
std::thread th1(&Algo1, src, &dst1);
std::thread th2(&Algo2, src, &dst2);
th1.join();
th2.join();
stop = clock();
parallel += (stop - start);
imshow("Paralllel Algo1", dst1);
imshow("Paralllel Algo2", dst2);
// measure threading overhead (2 calls)
int n = 2;
start = clock();
Test();
Test();
stop = clock();
testSequential += (stop - start);
start = clock();
std::thread thTest1(&Test);
std::thread thTest2(&Test);
thTest1.join();
thTest2.join();
stop = clock();
testParallel += (stop - start);
cnt++;
if (waitKey(30) >= 0)
break;
}
double parTime = 1000.0*parallel / cnt / (double)CLOCKS_PER_SEC;
double seqTime = 1000.0*sequential / cnt / (double)CLOCKS_PER_SEC;
double overHead = 1000.0*(testParallel - testSequential) / cnt / (double)CLOCKS_PER_SEC;
std::cout << std::endl << "Average processing time (2 calls):" << std::endl
<< "Parallel: " << parTime<< "ms"
<< "\t Sequential: " << seqTime << "ms"
<< "\t Overhead: " << overHead << "ms"
<< "";
std::cout << std::endl << "Press a Enter to terminate ";
std::cin.get();
}