Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

The result obtained by cuda::dft is different from cv::dft

I'm tring to speed up the cv::dft by using the gpu version, but I find the result obtained by cv::cuda::dft is different from cv::dft.

Here's the code:

CPU version:

Mat t = imread(...) // read the src image
Mat f, dst;
Mat plane_h[] = { Mat_<float>(t), Mat::zeros(t.size(),CV_32F) };
merge(plane_h, 2, t);
merge(plane_h, 2, f);
cv::dft(t, f, DFT_SCALE | DFT_COMPLEX_OUTPUT);
cv::dft(f, dst, DFT_INVERSE | DFT_REAL_OUTPUT);

GPU vesion:

Mat t = imread(...); // read the src image
cuda::GpuMat t_dev, f_dev, dst_dev;
Mat dst;
t_dev.upload(t);
cuda::GpuMat plane_h[] = { t_dev, GPU::GpuMat(t_dev.size(),CV_32FC1) };
cuda::merge(plane_h, 2, t_dev);
cuda::merge(plane_h, 2, f_dev);
cuda::dft(t_dev, f_dev, t_dev.size(), DFT_SCALE);
cuda::dft(f_dev, dst_dev, t_dev.size(), DFT_COMPLEX_INPUT | DFT_REAL_OUTPUT);
dst_dev.download(dst);

in cpu version, 'dst' is equal to 't'; while in gpu version, 'dst' was totally wrong.

I also found the the 'f_dev' in gpu vetsion is equal to 'f' in cpu version.

The result obtained by cuda::dft is different from cv::dft

I'm tring to speed up the cv::dft by using the gpu version, but I find the result obtained by cv::cuda::dft is different from cv::dft.

Here's the code:

CPU version:

Mat t = imread(...) // read the src image
Mat f, dst;
Mat plane_h[] = { Mat_<float>(t), Mat::zeros(t.size(),CV_32F) };
merge(plane_h, 2, t);
merge(plane_h, 2, f);
cv::dft(t, f, DFT_SCALE | DFT_COMPLEX_OUTPUT);
cv::dft(f, dst, DFT_INVERSE | DFT_REAL_OUTPUT);

GPU vesion:

Mat t = imread(...); // read the src image
cuda::GpuMat t_dev, f_dev, dst_dev;
Mat dst;
t_dev.upload(t);
cuda::GpuMat plane_h[] = { t_dev, GPU::GpuMat(t_dev.size(),CV_32FC1) cuda::GpuMat(t_dev.size(),CV_32FC1) };
cuda::merge(plane_h, 2, t_dev);
cuda::merge(plane_h, 2, f_dev);
cuda::dft(t_dev, f_dev, t_dev.size(), DFT_SCALE);
cuda::dft(f_dev, dst_dev, t_dev.size(), DFT_COMPLEX_INPUT | DFT_REAL_OUTPUT);
dst_dev.download(dst);

in cpu version, 'dst' is equal to 't'; while in gpu version, 'dst' was totally wrong.

I also found the the 'f_dev' in gpu vetsion is equal to 'f' in cpu version.