Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:

//The size of the video frame is 480 * 640

//funcam.cpp

using namespace std;

using namespace cv;

using namespace cv::gpu;

void callKernel(const GpuMat& src, const GpuMat& dst)

{

uchar* p = src.data;

uchar* p2 = dst.data;

func(p, p2, src.step, src.cols, src.rows, dst.step);

}

int main(int, char**) {

VideoCapture cap(0);

if(!cap.isOpened()) return -1;

int frameH = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);

int frameW = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);

cout << frameH << " " << frameW << endl;

CvSize size = cvSize(frameH, frameW);

Mat frame;

Mat input;

Mat output;

GpuMat d_frame;

GpuMat d_output;

for(;;)

{

  cap >> frame;

  if (frame.empty())

     break;

  //convert to grayscale

  cvtColor(frame, input, CV_BGR2GRAY);

  // memory Copy from Host to Device

  d_frame.upload(input);

  // Call CUDA kernel

  d_output.create(size, CV_8UC1);

  callKernel(d_frame, d_output);

  // memory Copy from Device to Host

  d_output.download(output);

  imshow("output", output);

  if(waitKey(30) >= 0)

     break;

}

return 0;

}

//funcam_cuda.cu

__global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)

{

int rowInd = blockIdx.y * blockDim.y + threadIdx.y;

int colInd = blockIdx.x * blockDim.x + threadIdx.x;

uchar* rowsrcPtr = srcptr + rowInd*step;

uchar* rowdstPtr = dstptr + rowInd*dststep;

uchar pixVal = rowsrcPtr[colInd];

// rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);

rowdstPtr[colInd] = 0;

}

extern "C"

void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) {

dim3 grDim (16, 12);

dim3 blDim (40, 40);

funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);

cudaThreadSynchronize(); //Is this reqd?

}

click to hide/show revision 2
improved style

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:

 //The size of the video frame is 480 * 640

//funcam.cpp

640 //funcam.cpp using namespace std;

std; using namespace cv;

cv; using namespace cv::gpu;

cv::gpu; void callKernel(const GpuMat& src, const GpuMat& dst)

{

dst) { uchar* p = src.data;

src.data; uchar* p2 = dst.data;

dst.data; func(p, p2, src.step, src.cols, src.rows, dst.step);

}

dst.step); } int main(int, char**) {

{ VideoCapture cap(0);

cap(0); if(!cap.isOpened()) return -1;

-1; int frameH = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);

cap.get(CV_CAP_PROP_FRAME_HEIGHT); int frameW = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);

cap.get(CV_CAP_PROP_FRAME_WIDTH); cout << frameH << " " << frameW << endl;

endl; CvSize size = cvSize(frameH, frameW);

frameW); Mat frame;

frame; Mat input;

input; Mat output;

output; GpuMat d_frame;

d_frame; GpuMat d_output;

for(;;)

{

d_output;
for(;;)
{
 cap >> frame;
 if (frame.empty())
 break;
  //convert to grayscale
 cvtColor(frame, input, CV_BGR2GRAY);
  // memory Copy from Host to Device
 d_frame.upload(input);
  // Call CUDA kernel
 d_output.create(size, CV_8UC1);
 callKernel(d_frame, d_output);
  // memory Copy from Device to Host
 d_output.download(output);
  imshow("output", output);
  if(waitKey(30) >= 0)
 break;

}

 }
return 0;

}

//funcam_cuda.cu

} //funcam_cuda.cu __global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)

{

{ int rowInd = blockIdx.y * blockDim.y + threadIdx.y;

threadIdx.y; int colInd = blockIdx.x * blockDim.x + threadIdx.x;

threadIdx.x; uchar* rowsrcPtr = srcptr + rowInd*step;

rowInd*step; uchar* rowdstPtr = dstptr + rowInd*dststep;

rowInd*dststep; uchar pixVal = rowsrcPtr[colInd];

rowsrcPtr[colInd]; // rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);

0); rowdstPtr[colInd] = 0;

}

0; } extern "C"

"C" void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) {

{ dim3 grDim (16, 12);

12); dim3 blDim (40, 40);

40); funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);

dststep); cudaThreadSynchronize(); //Is this reqd?

reqd? }

click to hide/show revision 3
formatting

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:


//The size of the video frame is 480 * 640
 //funcam.cpp

using namespace std;
 using namespace cv;
 using namespace cv::gpu;
 
void callKernel(const GpuMat& src, const GpuMat& dst)
 {
    uchar* p = src.data;
    uchar* p2 = dst.data;
    func(p, p2, src.step, src.cols, src.rows, dst.step);
 }
 
int main(int, char**) 
{
    VideoCapture cap(0);
    if(!cap.isOpened()) return -1;

    int frameH    = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);
    int frameW    = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);
    cout << frameH << " " << frameW << endl;

   CvSize size = cvSize(frameH, frameW);
    Mat frame;
    Mat input;
    Mat output;
    GpuMat d_frame;
    GpuMat d_output;

   for(;;)
    {
       cap >> frame;
       if (frame.empty())
          break;

      //convert to grayscale
       cvtColor(frame, input, CV_BGR2GRAY);

      // memory Copy from Host to Device
       d_frame.upload(input);

      // Call CUDA kernel
       d_output.create(size, CV_8UC1);
       callKernel(d_frame, d_output);

      // memory Copy from Device to Host
       d_output.download(output);

      imshow("output", output);
       if(waitKey(30) >= 0)
          break;
   }
     return 0;
 }

 //funcam_cuda.cu
 __global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) 
 {
    int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
    int colInd = blockIdx.x * blockDim.x + threadIdx.x;
    uchar* rowsrcPtr = srcptr + rowInd*step;
    uchar* rowdstPtr = dstptr + rowInd*dststep;
    uchar pixVal = rowsrcPtr[colInd];
 // rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
    rowdstPtr[colInd] = 0;
 }

extern "C"
 void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) dststep)
{
 dim3 grDim (16, 12);
 dim3 blDim (40, 40);
 funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);
 cudaThreadSynchronize(); //Is this reqd?

}}
click to hide/show revision 4
retagged

GPU Code Not Working Question

Hi,

(This is a continuation from the post "Pseudocode for custom GPU computation")

Following is my GPU opencv code. I tried 2 things - 1) to just set all pixels of the output to zero, 2) return a monochrome image based on a threshold on the pixel values of the input image. But for both cases when I run it, I dont get the result I want, I get some grayscale image, which is just peppered noise. My code below:


//The size of the video frame is 480 * 640
//funcam.cpp

using namespace std;
using namespace cv;
using namespace cv::gpu;

void callKernel(const GpuMat& src, const GpuMat& dst)
{
   uchar* p = src.data;
   uchar* p2 = dst.data;
   func(p, p2, src.step, src.cols, src.rows, dst.step);
}

int main(int, char**) 
{
   VideoCapture cap(0);
   if(!cap.isOpened()) return -1;

   int frameH    = (int) cap.get(CV_CAP_PROP_FRAME_HEIGHT);
   int frameW    = (int) cap.get(CV_CAP_PROP_FRAME_WIDTH);
   cout << frameH << " " << frameW << endl;

   CvSize size = cvSize(frameH, frameW);
   Mat frame;
   Mat input;
   Mat output;
   GpuMat d_frame;
   GpuMat d_output;

   for(;;)
   {
      cap >> frame;
      if (frame.empty())
         break;

      //convert to grayscale
      cvtColor(frame, input, CV_BGR2GRAY);

      // memory Copy from Host to Device
      d_frame.upload(input);

      // Call CUDA kernel
      d_output.create(size, CV_8UC1);
      callKernel(d_frame, d_output);

      // memory Copy from Device to Host
      d_output.download(output);

      imshow("output", output);
      if(waitKey(30) >= 0)
         break;
   }
    return 0;
}

//funcam_cuda.cu
__global__ void funcKernel(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep) 
{
   int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
   int colInd = blockIdx.x * blockDim.x + threadIdx.x;
   uchar* rowsrcPtr = srcptr + rowInd*step;
   uchar* rowdstPtr = dstptr + rowInd*dststep;
   uchar pixVal = rowsrcPtr[colInd];
// rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
   rowdstPtr[colInd] = 0;
}

extern "C"
void func(uchar* srcptr, uchar* dstptr, int step, int cols, int rows, int dststep)
{
dim3 grDim (16, 12);
dim3 blDim (40, 40);
funcKernel<<< grDim, blDim >>>(srcptr, dstptr, step, cols, rows, dststep);
cudaThreadSynchronize(); //Is this reqd?
}