I just struggled with this for a bit so I wanted to post it somewhere where it may be helpful for someone else.
This function can be used for creating a thrust iterator that correctly indexes a cv::cuda::GpuMat.
struct step_functor : public thrust::unary_function<int, int>
{
int columns;
int step;
step_functor(int columns_, int step_) : columns(columns_), step(step_) { };
__host__ __device__
int operator()(int x) const
{
int row = x / columns;
int idx = (row * step) + x % columns;
return idx;
}
};
template<typename T>
thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor, thrust::counting_iterator<int>>> GpuMatBeginItr(cv::cuda::GpuMat mat)
{
return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0)),
thrust::make_transform_iterator(thrust::make_counting_iterator(0),
step_functor(mat.cols, mat.step / sizeof(T))));
}
template<typename T>
thrust::permutation_iterator<thrust::device_ptr<T>, thrust::transform_iterator<step_functor, thrust::counting_iterator<int>>> GpuMatEndItr(cv::cuda::GpuMat mat)
{
return thrust::make_permutation_iterator(thrust::device_pointer_cast(mat.ptr<T>(0)),
thrust::make_transform_iterator(thrust::make_counting_iterator(mat.rows),
step_functor(mat.cols, mat.step / sizeof(T))));
}
Thus performing thrust operations on rows / columns is as easy as:
cv::cuda::GpuMat d_test(h_test);
auto keyBegin = GpuMatBeginItr<int>(d_test.col(4));
auto keyEnd = GpuMatEndItr<int>(d_test.col(4));
auto valueBegin = GpuMatBeginItr<int>(d_test.col(5));
thrust::sort_by_key(keyBegin, keyEnd, valueBegin);