GPU cv::scaleAdd is slower than CPU cv::cuda::scaleAdd

asked 2016-09-05 23:38:07 -0600

alvarouc gravatar image

Hi, I used the following code to compare execution time between CPU and GPU versions of scaleAdd. I am in Ubuntu 16.04, cuda 8.0, 740M card, and core i7 5th gen in my laptop.

However, the results I get is that the CPU version is always faster.

Could you execute the code and provide me your results with machine configuration?

I compile with

OPENCV_LIBS=`pkg-config opencv --cflags --libs`
g++ -c scaleAdd.cpp -Wall -Wextra -m64 -lm  -L$(OPENCV_LIBPATH) $(OPENCV_LIBS)

image description

#include <opencv2/core.hpp>
#include "opencv2/cudaarithm.hpp"
#include <iostream>
using namespace std;

class Timer
  double tick;
  void tic()
    tick = (double)cv::getTickCount();
  double toc()
    return ((double)cv::getTickCount() - tick)/cv::getTickFrequency();

int main()
  size_t LOOPS = 100;
  size_t MAX_SIZE=6000;
  Timer clock;
  float filter= 1;
  cv::cuda::GpuMat buffer;
  cv::cuda::GpuMat result;

  cout<< "Time in ms" << endl;
  cout<< "N,BW(GBS),GPU,CPU"<< endl;

  for (size_t N=200; N<MAX_SIZE; N*=1.1)
      //setup buffer
      cv::Mat h_buffer = cv::Mat::ones(N, N, CV_32FC1);
      cv::Mat h_result = cv::Mat::ones(N, N, CV_32FC1);

      double bw = N*N*32.0/8.0/clock.toc()/1e9;

      for (size_t i=0; i<LOOPS; i++)
    cv::cuda::scaleAdd(buffer, filter, result, result);
      double gpu = clock.toc()/(double)LOOPS;

      for (size_t i=0; i<LOOPS; i++)
    cv::scaleAdd(h_buffer, filter, h_result, h_result);
      double cpu = clock.toc()/(double)LOOPS;

      cout<< N << "," << bw << "," << gpu << "," << cpu << endl;

edit retag flag offensive close merge delete