Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Hello!

Your main error is:

CvSize size = cvSize(frameH, frameW);

cvSize() signature is cvSize(int width, int height), the first parameter must be width:

CvSize size = cvSize(frameW, frameH);

Also dim3 blDim (40, 40); is not a good size for block. The good sizes are 16x16, 32x8. cudaThreadSynchronize is deprecated, use cudaDeviceSynchronize.

// .cu file

#include <cuda_runtime.h>

__global__ void funcKernel(const unsigned char* srcptr, unsigned char* dstptr, 
                           size_t srcstep, size_t dststep, int cols, int rows)
{
    int rowInd = blockIdx.y * blockDim.y + threadIdx.y;
    int colInd = blockIdx.x * blockDim.x + threadIdx.x;

    if (rowInd >= rows || colInd >= cols)
        return;

    const unsigned char* rowsrcPtr = srcptr + rowInd * srcstep;
    unsigned char* rowdstPtr = dstptr + rowInd * dststep;

    unsigned char pixVal = rowsrcPtr[colInd];

    rowdstPtr[colInd] = (pixVal > 60 ? 255 : 0);
}

int divUp(int a, int b)
{
    return (a + b - 1) /b;
}

void func(const unsigned char* srcptr, unsigned char* dstptr, 
          size_t srcstep, size_t dststep, int cols, int rows)
{
    dim3 blDim(32, 8);
    dim3 grDim(divUp(cols, blDim.x), divUp(rows, blDim.y));

    funcKernel<<<grDim, blDim>>>(srcptr, dstptr, srcstep, dststep, cols, rows);

    cudaDeviceSynchronize();
}