Ask Your Question

Farhad's profile - activity

2019-01-11 07:09:34 -0600 received badge  Popular Question (source)
2014-01-03 15:49:15 -0600 received badge  Supporter (source)
2014-01-03 15:49:02 -0600 received badge  Scholar (source)
2014-01-03 11:46:50 -0600 commented answer using GPU module with own code

Thank you, that solved my problem.

2014-01-03 03:27:55 -0600 asked a question using GPU module with own code

I am new to CUDA programming, and am trying to use OpenCV's GPU module with my own CUDA code, but am having problem getting it to work so was wondering if anyone here can point out what I am doing wrong.

I have made a very simple toy problem.

----------------------------------
// main.cpp 

#include <iostream>
#include <fstream>


#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"

#include "test.h"

using namespace std;
using namespace cv;
using namespace cv::gpu;

int main()
{
    setDevice(0);

    Mat image = imread("./testset/image_0001.png");
    Mat bw_image(image.size(), CV_32FC1); 
    cvtColor(image, bw_image, CV_RGB2GRAY);
    GpuMat  d_image(bw_image);

    GpuMat  d_image_result(d_image.size(), d_image.type() );


    test_func(d_image, d_image_result);

    Mat mmm;
    d_image_result.download(mmm);

  return 0;
}


-----------------------------------------------------------
// test.h

#ifndef __TEST__
#define __TEST__

#ifndef SKIP_INCLUDES
#include <vector>
#include <memory>
#include <iosfwd>
#endif

#include "opencv2/core/gpumat.hpp"

using namespace std;
using namespace cv;
using namespace cv::gpu;

void do_test(PtrStepSzb src,PtrStepSzb dst);

CV_EXPORTS void test_func(const GpuMat& src, GpuMat& dst)
{
  do_test(src, dst);
}

#endif /* __TEST__ */

-----------------------------------------------------------------------
// test.cu

using namespace std;
using namespace cv;
using namespace cv::gpu;

__global__ void do_test_kernel(PtrStepSz<float> src, PtrStepSz<float> dst)
{

    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if(x<dst.cols && y < dst.rows)
        {dst.ptr(y)[x] = src.ptr(y)[x];}
}

void    do_test(const PtrStepSz<float>& src, PtrStepSz<float>& dst)
{
    dim3 block(32,8);
    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));

    do_test_kernel<<<grid,block>>>(src,dst);
    cudaSafeCall( cudaGetLastError() );

    cudaSafeCall( cudaDeviceSynchronize() );
};

-------------------------------------------------------------

Now running the code explained above, results in this runtime error:

OpenCV Error: Gpu API call (unspecified launch failure) in caller, file /.../opencv-2.4.7/modules/gpu/src/cuda/matrix_reductions.cu, line 437 terminate called after throwing an instance of 'cv::Exception' what(): /.../opencv-2.4.7/modules/gpu/src/cuda/matrix_reductions.cu:437: error: (-217) unspecified launch failure in function caller

Can anyone please tell me what is the problem with my code?