Hi so I'm a beginner trying to write a pretty simple kernel. I just want to take the arc cosine of the pixels of an image and then return those. I'm using GpuMat objects but I cannot get the kernel to do anything.
//.cpp code
void gpu_acos(const gpu::GpuMat &src, gpu::GpuMat &dst){
float* srcptr = (float *)src.data;
float* dstptr = (float *)dst.data;
acos_func(srcptr,dstptr, src.step,dst.step, src.cols,src.rows);
return;
}
bool test_acos(){
Mat input = imread("corgi.jpg",0);
int rows = input.rows;
int cols = input.cols;
Size in_size(rows,cols);
gpu::GpuMat src, dst;
src.upload(input);
dst.create(in_size,CV_32FC1);
gpu_acos(src,dst);
cout << "baack from gpu call\n";
Mat test_out;
dst.download(test_out);
cout << "this is the test: " << test_out << endl;
return true;
}
//.cu code
#include "custom_kernels.h"
#include <iostream>
using namespace std;
using namespace cv;
__global__ void acosKernel(const float* srcptr, float* dstptr, size_t srcstep, size_t dststep, int cols, int rows){
int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
int colInd = blockIdx.x*blockDim.x+threadIdx.x;
if(rowInd >= rows || colInd >= cols)
return;
const float* rowsrcPtr = srcptr+rowInd*srcstep;
float* rowdstPtr = dstptr+rowInd*dststep;
float pixVal = rowsrcPtr[colInd];
if( ((int) pixVal % 90)==0)
rowdstPtr[colInd]=0.0;
else
rowdstPtr[colInd] = acos(pixVal);
}
int divUp(int a, int b){
return (a+b-1)/b;
}
void acos_func(const float* srcptr, float* dstptr, size_t srcstep, size_t dststep, int cols, int rows){
dim3 blDim(32,8);
dim3 grDim(divUp(cols,blDim.x),divUp(rows,blDim.y));
acosKernel<<<grDim, blDim>>>(srcptr,dstptr,srcstep,dststep,cols,rows);
cudaDeviceSynchronize();
} ~
Even when I've replaced the acos logic with something simple (for example all 0's) I get as output the input image. I'm sure I'm missing something obvious, but any help would be greatly appreciated. Thanks in advance!