CUDA initialization delay for successive methods
Hello. I'm working on a simple program to evaluate the performance of some cv::cuda methods. I am using Opencv 3.1 on Unbuntu 15 with a GeForce 770. (CUDA 7.5)
There's something a I really don't understand with the initialization of CUDA environnement and the impact of such initialization on the first call of cv::cuda methods.
I initialize my program with a cv::cuda::getDevice() then I test 2 methods : - cv::cuda::resize() (factor 0.5) - and cv::cuda::meanStdDev.
Initializaion takes 400ms. Then resizing takes 1 ms, that's OK. But meanStdDev : 476ms !!! If I run two successive meanStdDev the second one is much faster (3ms).
I really don't understand why the initialization has an effect on resize() but not on meanStdDev()...
Thanks for your help.
Pierre.
#include <opencv2/opencv.hpp>
#include <opencv2/cudaimgproc.hpp>
#include "opencv2/cudawarping.hpp"
#include "opencv2/cudaarithm.hpp"
using namespace std;
int main()
{
double t_init_cuda = (double)cv::getTickCount();
int CudaDevice;
if(cv::cuda::getCudaEnabledDeviceCount()==0)
{
cerr<<endl<<"ERROR: NO CudaEnabledDevice"<<endl;
exit(2);
}
else
{
CudaDevice = cv::cuda::getDevice();
}
t_init_cuda = ((double)cv::getTickCount() - t_init_cuda)/cv::getTickFrequency() * 1000;
cout<<endl<<"T_INIT_CUDA="<<t_init_cuda<<"ms\n";;
cv::Mat src = cv::imread("/home/my_image.jpg", 0);
if (!src.data) exit(1);
//CV::CUDA::RESIZE
cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat d_dst;
double factor = 0.5;
double t_gpu_resize = cv::getTickCount();
cv::cuda::resize(d_src, d_dst, cv::Size( (int) ((float) (d_src.cols)*factor) , (int) ((float) (d_src.rows)*factor)), 0, 0, CV_INTER_AREA);
t_gpu_resize = ((double)cv::getTickCount() - t_gpu_resize)/cv::getTickFrequency() * 1000;
cout<<endl<<"T_GPU_RESIZE="<<t_gpu_resize<<"ms\n";;
//CV::CUDA::MEANSTDDEV
double t_meanstddev = (double)cv::getTickCount();
cv::Scalar mean, stddev;
std::vector<cv::cuda::GpuMat> d_dst_split;
cv::cuda::split(d_dst, d_dst_split);
cv::cuda::meanStdDev (d_dst_split[0], mean, stddev);
t_meanstddev = ((double)cv::getTickCount() - t_meanstddev)/cv::getTickFrequency() * 1000.0;
cout<<endl<<"T_meanStdDev_GPU="<<t_meanstddev<<"ms\n";
return 0;
}