Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Custom Kernel with GpuMat No Effect

Hi so I'm a beginner trying to write a pretty simple kernel. I just want to take the arc cosine of the pixels of an image and then return those. I'm using GpuMat objects but I cannot get the kernel to do anything.

//.cpp code
void gpu_acos(const gpu::GpuMat &src, gpu::GpuMat &dst){
    float* srcptr = (float *)src.data;
    float* dstptr = (float *)dst.data;
    acos_func(srcptr,dstptr, src.step,dst.step, src.cols,src.rows);
    return;

}

bool test_acos(){
    Mat input = imread("corgi.jpg",0);
    int rows = input.rows;
    int cols = input.cols;
    Size in_size(rows,cols);
    gpu::GpuMat src, dst;
    src.upload(input);
    dst.create(in_size,CV_32FC1);
    gpu_acos(src,dst);
    cout << "baack from gpu call\n";

    Mat test_out;
    dst.download(test_out);
    cout << "this is the test: " << test_out << endl;
    return true;

}

 //.cu code
#include "custom_kernels.h"
#include <iostream>
using namespace std;
using namespace cv; 
__global__ void acosKernel(const float* srcptr, float* dstptr, size_t srcstep, size_t dststep, int cols, int rows){
    int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
    int colInd = blockIdx.x*blockDim.x+threadIdx.x;
    if(rowInd >= rows || colInd >= cols)
            return;
    const float* rowsrcPtr = srcptr+rowInd*srcstep;
    float* rowdstPtr = dstptr+rowInd*dststep;

    float pixVal = rowsrcPtr[colInd];

    if( ((int) pixVal % 90)==0)
            rowdstPtr[colInd]=0.0;
    else
            rowdstPtr[colInd] = acos(pixVal);

}

int divUp(int a, int b){ 
    return (a+b-1)/b;

}

void acos_func(const float* srcptr, float* dstptr, size_t srcstep, size_t dststep, int cols, int rows){
    dim3 blDim(32,8);
    dim3 grDim(divUp(cols,blDim.x),divUp(rows,blDim.y));

    acosKernel<<<grDim, blDim>>>(srcptr,dstptr,srcstep,dststep,cols,rows);
    cudaDeviceSynchronize();

} ~

Even when I've replaced the acos logic with something simple (for example all 0's) I get as output the input image. I'm sure I'm missing something obvious, but any help would be greatly appreciated. Thanks in advance!