CUDA initialization delay for successive methods

asked 2016-02-01 03:51:32 -0500

pam gravatar image

Hello. I'm working on a simple program to evaluate the performance of some cv::cuda methods. I am using Opencv 3.1 on Unbuntu 15 with a GeForce 770. (CUDA 7.5)

There's something a I really don't understand with the initialization of CUDA environnement and the impact of such initialization on the first call of cv::cuda methods.

I initialize my program with a cv::cuda::getDevice() then I test 2 methods : - cv::cuda::resize() (factor 0.5) - and cv::cuda::meanStdDev.

Initializaion takes 400ms. Then resizing takes 1 ms, that's OK. But meanStdDev : 476ms !!! If I run two successive meanStdDev the second one is much faster (3ms).

I really don't understand why the initialization has an effect on resize() but not on meanStdDev()...

Thanks for your help.

Pierre.

#include <opencv2/opencv.hpp>
#include <opencv2/cudaimgproc.hpp>
#include "opencv2/cudawarping.hpp"
#include "opencv2/cudaarithm.hpp"

using namespace std;

int main() 
{
    double t_init_cuda = (double)cv::getTickCount();
    int CudaDevice;
    if(cv::cuda::getCudaEnabledDeviceCount()==0)
    {
        cerr<<endl<<"ERROR: NO CudaEnabledDevice"<<endl;
        exit(2);
    }
    else
    {
        CudaDevice = cv::cuda::getDevice();
    }
    t_init_cuda = ((double)cv::getTickCount() - t_init_cuda)/cv::getTickFrequency() * 1000;
    cout<<endl<<"T_INIT_CUDA="<<t_init_cuda<<"ms\n";;


    cv::Mat src = cv::imread("/home/my_image.jpg", 0);
    if (!src.data) exit(1);

    //CV::CUDA::RESIZE
    cv::cuda::GpuMat d_src(src);
    cv::cuda::GpuMat d_dst;
    double factor = 0.5;

    double t_gpu_resize = cv::getTickCount();
    cv::cuda::resize(d_src, d_dst, cv::Size( (int) ((float) (d_src.cols)*factor) , (int) ((float) (d_src.rows)*factor)), 0, 0, CV_INTER_AREA);
    t_gpu_resize = ((double)cv::getTickCount() - t_gpu_resize)/cv::getTickFrequency() * 1000;
    cout<<endl<<"T_GPU_RESIZE="<<t_gpu_resize<<"ms\n";;

    //CV::CUDA::MEANSTDDEV
    double t_meanstddev = (double)cv::getTickCount();

    cv::Scalar mean, stddev;
    std::vector<cv::cuda::GpuMat> d_dst_split;
    cv::cuda::split(d_dst, d_dst_split);
    cv::cuda::meanStdDev (d_dst_split[0], mean, stddev); 
    t_meanstddev = ((double)cv::getTickCount() - t_meanstddev)/cv::getTickFrequency() * 1000.0;
    cout<<endl<<"T_meanStdDev_GPU="<<t_meanstddev<<"ms\n";

    return 0;
}
edit retag flag offensive close merge delete