Revision history - OpenCV Q&A Forum

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:

//The size of the video frame is 480 * 640

//funcam.cpp

using namespace std;

using namespace cv;

using namespace cv::gpu;

void callKernel(const GpuMat& src, const GpuMat& dst)

{

uchar* p = src.data;

uchar* p2 = dst.data;

func(p, p2, src.step, src.cols, src.rows, dst.step);

}

int main(int, char**) {

VideoCapture cap(0);

if(!cap.isOpened()) return -1;

int frameH = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);

int frameW = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);

cout << frameH << " " << frameW << endl;

CvSize size = cvSize(frameH, frameW);

Mat frame;

Mat input;

Mat output;

GpuMat d_frame;

GpuMat d_output;

for(;;)

{

  cap >> frame;

  if (frame.empty())

     break;

  //convert to grayscale

  cvtColor(frame, input, CV_BGR2GRAY);

  // memory Copy from Host to Device

  d_frame.upload(input);

  // Call CUDA kernel

  d_output.create(size, CV_8UC1);

  callKernel(d_frame, d_output);

  // memory Copy from Device to Host

  d_output.download(output);

  imshow("output", output);

  if(waitKey(30) >= 0)

     break;

}

return 0;

}

//funcam_cuda.cu

__global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)

{

int rowInd = blockIdx.y * blockDim.y + threadIdx.y;

int colInd = blockIdx.x * blockDim.x + threadIdx.x;

uchar* rowsrcPtr = srcptr + rowInd*step;

uchar* rowdstPtr = dstptr + rowInd*dststep;

uchar pixVal = rowsrcPtr[colInd];

// rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);

rowdstPtr[colInd] = 0;

}

extern "C"

void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) {

dim3 grDim (16, 12);

dim3 blDim (40, 40);

funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);

cudaThreadSynchronize(); //Is this reqd?

}

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU ~~computation")~~computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:

//The size of the video frame is 480 * 640 //funcam.cpp 640 //funcam.cpp using namespace std; std; using namespace cv; cv; using namespace cv::gpu; cv::gpu; void callKernel(const GpuMat& src, const GpuMat& dst) { dst) { uchar* p = src.data; src.data; uchar* p2 = dst.data; dst.data; func(p, p2, src.step, src.cols, src.rows, dst.step); } dst.step); } int main(int, char**) { { VideoCapture cap(0); cap(0); if(!cap.isOpened()) return -1; -1; int frameH = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT); cap.get(CV_CAP_PROP_FRAME_HEIGHT); int frameW = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH); cap.get(CV_CAP_PROP_FRAME_WIDTH); cout << frameH << " " << frameW << endl; endl; CvSize size = cvSize(frameH, frameW); frameW); Mat frame; frame; Mat input; input; Mat output; output; GpuMat d_frame; d_frame; GpuMat d_output; for(;;) { d_output; for(;;) { cap >> frame; if (frame.empty()) break; //convert to grayscale cvtColor(frame, input, CV_BGR2GRAY); // memory Copy from Host to Device d_frame.upload(input); // Call CUDA kernel d_output.create(size, CV_8UC1); callKernel(d_frame, d_output); // memory Copy from Device to Host d_output.download(output); imshow("output", output); if(waitKey(30) >= 0) break; } } return 0; } //funcam_cuda.cu } //funcam_cuda.cu __global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) { { int rowInd = blockIdx.y * blockDim.y + threadIdx.y; threadIdx.y; int colInd = blockIdx.x * blockDim.x + threadIdx.x; threadIdx.x; uchar* rowsrcPtr = srcptr + rowInd*step; rowInd*step; uchar* rowdstPtr = dstptr + rowInd*dststep; rowInd*dststep; uchar pixVal = rowsrcPtr[colInd]; rowsrcPtr[colInd]; // rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0); 0); rowdstPtr[colInd] = 0; } 0; } extern "C" "C" void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) { { dim3 grDim (16, 12); 12); dim3 blDim (40, 40); 40); funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep); dststep); cudaThreadSynchronize(); //Is this reqd? reqd? }

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:


//The size of the video frame is 480 * 640
 //funcam.cpp

using namespace std;
 using namespace cv;
 using namespace cv::gpu;
 
void callKernel(const GpuMat& src, const GpuMat& dst)
 {
    uchar* p = src.data;
    uchar* p2 = dst.data;
    func(p, p2, src.step, src.cols, src.rows, dst.step);
 }
 
int main(int, char**) 
{
    VideoCapture cap(0);
    if(!cap.isOpened()) return -1;

    int frameH    = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);
    int frameW    = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);
    cout << frameH << " " << frameW << endl;

   CvSize size = cvSize(frameH, frameW);
    Mat frame;
    Mat input;
    Mat output;
    GpuMat d_frame;
    GpuMat d_output;

   for(;;)
    {
       cap >> frame;
       if (frame.empty())
          break;

      //convert to grayscale
       cvtColor(frame, input, CV_BGR2GRAY);

      // memory Copy from Host to Device
       d_frame.upload(input);

      // Call CUDA kernel
       d_output.create(size, CV_8UC1);
       callKernel(d_frame, d_output);

      // memory Copy from Device to Host
       d_output.download(output);

      imshow("output", output);
       if(waitKey(30) >= 0)
          break;
   }
     return 0;
 }

 //funcam_cuda.cu
 __global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) 
 {
    int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
    int colInd = blockIdx.x * blockDim.x + threadIdx.x;
    uchar* rowsrcPtr = srcptr + rowInd*step;
    uchar* rowdstPtr = dstptr + rowInd*dststep;
    uchar pixVal = rowsrcPtr[colInd];
 // rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
    rowdstPtr[colInd] = 0;
 }

extern "C"
 void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) dststep)
{
 dim3 grDim (16, 12);
 dim3 blDim (40, 40);
 funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);
 cudaThreadSynchronize(); //Is this reqd?

}}

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:


//The size of the video frame is 480 * 640
//funcam.cpp

using namespace std;
using namespace cv;
using namespace cv::gpu;

void callKernel(const GpuMat& src, const GpuMat& dst)
{
   uchar* p = src.data;
   uchar* p2 = dst.data;
   func(p, p2, src.step, src.cols, src.rows, dst.step);
}

int main(int, char**) 
{
   VideoCapture cap(0);
   if(!cap.isOpened()) return -1;

   int frameH    = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);
   int frameW    = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);
   cout << frameH << " " << frameW << endl;

   CvSize size = cvSize(frameH, frameW);
   Mat frame;
   Mat input;
   Mat output;
   GpuMat d_frame;
   GpuMat d_output;

   for(;;)
   {
      cap >> frame;
      if (frame.empty())
         break;

      //convert to grayscale
      cvtColor(frame, input, CV_BGR2GRAY);

      // memory Copy from Host to Device
      d_frame.upload(input);

      // Call CUDA kernel
      d_output.create(size, CV_8UC1);
      callKernel(d_frame, d_output);

      // memory Copy from Device to Host
      d_output.download(output);

      imshow("output", output);
      if(waitKey(30) >= 0)
         break;
   }
    return 0;
}

//funcam_cuda.cu
__global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) 
{
   int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
   int colInd = blockIdx.x * blockDim.x + threadIdx.x;
   uchar* rowsrcPtr = srcptr + rowInd*step;
   uchar* rowdstPtr = dstptr + rowInd*dststep;
   uchar pixVal = rowsrcPtr[colInd];
// rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
   rowdstPtr[colInd] = 0;
}

extern "C"
void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)
{
dim3 grDim (16, 12);
dim3 blDim (40, 40);
funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);
cudaThreadSynchronize(); //Is this reqd?
}

Revision history [back]

GPU Code Not Working Question

GPU Code Not Working Question

GPU Code Not Working Question

GPU Code Not Working Question