Im writting a console application that uses open cv and multithreading. Im testing it in a 4 native core CPU (8 with HT enabled) with 12 GB of RAM.
Each thread has to execute a function that uses opencv calls. In that case, the time is much bigger executing in parallel several threads than the run time obtained for a single thread. One expects that the time is more or less the same independently the number of threads or increased about 10%, but the run time for each thread increases according to the number of threads used. The more threads, the more the time for each one.
I have test cv::setnumThreads(8) and cv::setnumThreads(0) with the same result. If the function is replaced by other function with some own dummy filters over data, the behaviour is the expected one, all threads ends with the same run time independently of the numbre of threads. Does opencv functions block the threads or do some sequential operations that blok threads???. The picture below shows the times obtained in the application:
Time in file No. 3 --> means the total time to process a sequence of images (768) in miliseconds,
I attach a sample c++ project to test this behaviour. This application executes secuentially 1 to 8 threads. Does anybody know what is happening?. I don`t know what else to do...
Thanks.
EDIT.
Here is the code. I cannot attach a ready to use project to the post. It is in the link of the previous comment.comment. The test image is Test.jpg. The original used is in bmp format.
#include "stdafx.h"
#include <future>
#include <chrono>
#include "Filter.h"
#include <iostream>
#include <future>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
long long Ticks();
int WithOpencv(cv::Mat img);
int With_OUT_Opencv(cv::Mat img);
int TestThreads (char *buffer,std::string file);
#define Blur3x3(matrix,f,c) ((matrix[(f-1)*1600+(c-1)] + matrix[(f-1)*1600+c] + matrix[(f-1)*1600+(c+1)] + matrix[f*1600+(c-1)] + matrix[f*1600+c] + matrix[f*1600+(c+1)] + matrix[(f+1)*1600+(c-1)] + matrix[(f+1)*1600+c] + matrix[(f+1)*1600+(c+1)])/9)
int _tmain(int argc, _TCHAR* argv[])
{
std::string file="Test.bmp";
auto function = [&](char *buffer){return TestThreads(buffer,file);};
char *buffers[12];
std::future<int> frames[12];
DWORD tid;
int i,j;
int nframes = 0;
int ncores;
cv::setNumThreads(8);
for (i=0;i<8;i++) buffers[i] = new char[1000*1024*1024];
for (j=1;j<9;j++)
{
ncores = j;
long long t = Ticks();
for (i=0;i<ncores;i++) frames[i] = std::async(std::launch::async,function,buffers[i]);
for (i=0;i<ncores;i++) nframes += frames[i].get();
t = Ticks() - t;
std::cout << "Mean time using " << ncores << " cores is: " << t/nframes << "ms" << std::endl << std::endl;
nframes = 0;
Sleep(2000);
}
for (int i=0;i<8;i++) delete buffers[i];
return NULL;
return 0;
}
int TestThreads (char *buffer,std::string file)
{
long long ta;
int res;
char *ruta=new char[file.length() + 1];
strcpy(ruta,file.c_str());
cv::Mat img (1200, 1600, CV_8UC1);
img=cv::imread(file);
ta = Ticks();
for (int i=0;i<15;i++) {
//Uncomment this and comment next line to test without opencv calls. With_OUT_Opencv implements simple filters with direct operations over mat data
//res = With_OUT_Opencv(img);
res = WithOpencv(img);
}
ta = Ticks() - ta;
std::cout << "Time in file No. 3--> " << ta << std::endl;
return 15;
}
int WithOpencv(cv::Mat img){
cv::Mat img_bin;
cv::Mat img_filtered;
cv::Mat img_filtered2;
cv::Mat img_res;
int Crad_morf=2;
double Tthreshold=20;
cv::Mat element = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(2*Crad_morf + 1, 2*Crad_morf+1));
img.convertTo(img,CV_32F);
cv::blur(img, img_filtered, cv::Size(3, 3));
cv::blur(img.mul(img), img_filtered2, cv::Size(3, 3));
cv::sqrt(img_filtered2 - img_filtered.mul(img_filtered), img_res);
cv::normalize(img_res, img_res, 0.0, 1.0, cv::NORM_MINMAX);
img_res.convertTo(img_res,CV_8UC1,255.0);
cv::threshold(img_res, img_bin, Tthreshold, 255, cv::THRESH_BINARY);
if (Crad_morf!=0){
cv::dilate(img_bin, img_bin, element);
}
return 0;
}
int With_OUT_Opencv(cv::Mat img){
unsigned char *baux1 = new unsigned char[1600*1200];
unsigned short *baux2 = new unsigned short[1600*1200];
unsigned char max=0;
int f,c,i;
unsigned char threshold = 177;
for (f=1;f<1199;f++) // Bad Blur filters
{
for (c=1; c<1599; c++)
{
baux1[f*1600+c] = Blur3x3(img.data,f,c);
baux1[f*1600+c] = baux1[f*1600+c] * baux1[f*1600+c];
baux2[f*1600+c] = img.data[f*1600+c] * img.data[f*1600+c];
}
}
for (f=1;f<1199;f++)
{
for (c=1; c<1599; c++)
{
baux1[f*1600+c] = sqrt(Blur3x3(baux2,f,c) - baux1[f*1600+c]);
if (baux1[f*1600+c] > max) max = baux1[f*1600+c];
}
}
threshold = threshold * ((float)max/255.0); // Bad Norm/Bin
for (i=0;i<1600*1200;i++)
{
if (baux1[i]>threshold) baux1[i] = 1;
else baux1[i] = 0;
}
delete []baux1;
delete []baux2;
return 0;
}
long long Ticks()
{
static long long last = 0;
static unsigned ticksPerMS = 0;
LARGE_INTEGER largo;
if (last==0)
{
QueryPerformanceFrequency(&largo);
ticksPerMS = (unsigned)(largo.QuadPart/1000);
QueryPerformanceCounter(&largo);
last = largo.QuadPart;
return 0;
}
QueryPerformanceCounter(&largo);
return (largo.QuadPart-last)/ticksPerMS;
}
and this is the configuration of Opencv that is used. I have tested also with the prebuil version (and with other versions) with the same result
General configuration for OpenCV 2.4.9 =====================================
Version control: unknown
Platform:
Host: Windows 6.1 AMD64
CMake: 3.3.0-rc4
CMake generator: Visual Studio 11 2012 Win64
CMake build tool: c:/Windows/Microsoft.NET/Framework/v4.0.30319/MSBuild.exe
MSVC: 1700
C/C++:
Built as dynamic libs?: YES
C++ Compiler: C:/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin/x86_amd64/cl.exe (ver 17.0.50727.1)
C++ flags (Release): /DWIN32 /D_WINDOWS /W4 /GR /EHa /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECUR
E_NO_WARNINGS /Gy /bigobj /Oi /wd4251 /MD /O2 /Ob2 /D NDEBUG /Zi
C++ flags (Debug): /DWIN32 /D_WINDOWS /W4 /GR /EHa /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECUR
E_NO_WARNINGS /Gy /bigobj /Oi /wd4251 /D_DEBUG /MDd /Zi /Ob0 /Od /RTC1
C Compiler: C:/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin/x86_amd64/cl.exe
C flags (Release): /DWIN32 /D_WINDOWS /W3 /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARN
INGS /Gy /bigobj /Oi /MD /O2 /Ob2 /D NDEBUG /Zi
C flags (Debug): /DWIN32 /D_WINDOWS /W3 /D _CRT_SECURE_NO_DEPRECATE /D _CRT_NONSTDC_NO_DEPRECATE /D _SCL_SECURE_NO_WARN
INGS /Gy /bigobj /Oi /D_DEBUG /MDd /Zi /Ob0 /Od /RTC1
Linker flags (Release): /machine:x64 /INCREMENTAL:NO /debug
Linker flags (Debug): /machine:x64 /debug /INCREMENTAL
Precompiled headers: YES
OpenCV modules:
To be built: core flann imgproc highgui features2d calib3d ml video legacy objdetect photo gpu ocl nonfree contrib s
titching superres ts videostab
Disabled: world
Disabled by dependency: -
Unavailable: androidcamera dynamicuda java python viz
Windows RT support: NO
GUI:
QT: NO
Win32 UI: NO
OpenGL support: NO
VTK support: NO
Media I/O:
ZLib: build (ver 1.2.7)
JPEG: build (ver 62)
PNG: build (ver 1.5.12)
TIFF: build (ver 42 - 4.0.2)
JPEG 2000: build (ver 1.900.1)
OpenEXR: build (ver 1.7.1)
Video I/O:
Video for Windows: YES
DC1394 1.x: NO
DC1394 2.x: NO
FFMPEG: YES (prebuilt binaries)
codec: YES (ver 55.18.102)
format: YES (ver 55.12.100)
util: YES (ver 52.38.100)
swscale: YES (ver 2.3.100)
gentoo-style: YES
OpenNI: NO
OpenNI PrimeSensor Modules: NO
PvAPI: NO
GigEVisionSDK: NO
DirectShow: YES
Media Foundation: NO
XIMEA: NO
Intel PerC: NO
Other third-party libraries:
Use IPP: NO
Use Eigen: YES (ver 3.2.5)
Use TBB: YES (ver 4.3 interface 8006)
Use OpenMP: NO
Use GCD NO
Use Concurrency NO
Use C=: NO
Use Cuda: YES (ver 7.0)
Use OpenCL: YES
NVIDIA CUDA
Use CUFFT: YES
Use CUBLAS: YES
USE NVCUVID: NO
NVIDIA GPU arch: 30 35 52
NVIDIA PTX archs:
Use fast math: YES
OpenCL:
Version: dynamic
Include path: C:/Proyectos/Libs/OpenCV-2.4.9/opencv/sources/3rdparty/include/opencl/1.2
Use AMD FFT: NO
Use AMD BLAS: NO
Python:
Interpreter: NO
Java:
ant: NO
JNI: NO
Java tests: NO
Tests and samples:
Tests: YES
Performance tests: YES
C/C++ Examples: NO
Install path: C:/Proyectos/Libs/OpenCV-2.4.9/opencv/Builds/install
cvconfig.h is in: C:/Proyectos/Libs/OpenCV-2.4.9/opencv/Builds
-----------------------------------------------------------------