I have a small kernel that goes through each element in a binary image of type CV_32S assigning 0 to it, if its value is 255, otherwise the thread's unique ID, so I need a sequential 1D index into the GpuMat's data (label_connected_components is supposed to do more, but this is just the first step that goes wrong so far). I am using OpenCV 2.4.9 installed using Homebrew. Here is a MWE:
#include "gpu/Utils.hpp"
#include "cpu/Utils.hpp"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <stdexcept>
int main() {
try {
cv::Mat h_image = cv::imread("../images/t82_test_image.bmp", CV_LOAD_IMAGE_GRAYSCALE);
if (h_image.empty()) {
throw std::runtime_error("Unable to find image");
}
// Convert image contents to 32-bit signed integers
h_image.convertTo(h_image, CV_32S);
// Upload image to the device
cv::gpu::GpuMat d_image;
d_image.upload(h_image);
// Label connected components
utils::gpu::label_connected_components(d_image, 8, true);
// Download image from the device
h_image.setTo(0);
d_image.download(h_image);
// Display the labelled image
cv::Mat h_color_image(h_image.size(), CV_8UC4);
utils::draw_connected_components(h_image, h_color_image);
cv::imshow("Connected components", h_color_image);
cv::waitKey(0);
} catch (std::runtime_error& ex) {
std::cerr << "Error: " << ex.what() << std::endl;
return 1;
} catch (cv::Exception& cvex) {
std::cerr << "OpenCV error: " << cvex.what() << std::endl;
return 1;
}
return 0;
}
And here's the kernel:
// Calling the kernel:
// const dim3 threads(1024, 1, 1);
// dim3 blocks = (d_image.size().area() + threads.x + 1) / threads.x;
// initialize_labels<<<blocks, threads>>>(d_image);
__global__ void initialize_labels(cv::gpu::PtrStepSz<int> d_in) {
unsigned int gid = threadIdx.x + blockIdx.x * blockDim.x;
if (gid >= d_in.cols * d_in.rows) {
return;
}
d_in.data[gid] = (d_in.data[gid] == 0 ? gid + 1 : 0);
}
Finally, here's the artefacts I'm talking about:
The multi-colored blob is fine (unique label -> unique color), but the light blue area is not...My internet research has led me to believe I am messing up the access by not accounted for the GpuMat
's step size (in bytes), but I cannot firgure out how to use it with integers. I have another kernel where access is done using operator(y, x)
and that seems to work fine, but it's hard to tell when the first step does not work...What am I doing wrong?