Hello!
I have a problem with processing a set of images using opencv and cuda Unified Memory.
I am trying to use a Mat created once using a pre-allocated data buffer with cudaMallocManaged.
When reading the image, the data seem to be reallocated during the mat::create in the imread function, but create should not be re-allocating since my previous data pointer has the exact same size.
Here's an example of my issue
#include <iostream>
#include <experimental/filesystem>
#include <cuda_runtime.h>
#include <opencv2/opencv.hpp>
#include <opencv2/cudawarping.hpp>
#define COLS 5344
#define ROWS 4016
namespace fs = std::experimental::filesystem;
int main(int argc, char const *argv[])
{
int index = 0;
//---- get arguments ----
if (argc < 2) {
std::cout << "number of arguments invalid" << std::endl;
std::cout << "1 - Input Folder" << std::endl;
std::cout << "2 - Output Folder" << std::endl;
exit(1);
}
std::string input_folder = argv[1];
std::string output_folder = argv[2];
// Unified pointers
void *src_ptr, *dst_ptr;
if (cudaSuccess != cudaMallocManaged(&src_ptr, ROWS*COLS*3)) return -1;
cv::Mat src(ROWS, COLS, CV_8UC3, src_ptr);
cv::cuda::GpuMat d_src(ROWS, COLS, CV_8UC3, src_ptr);
if (cudaSuccess != cudaMallocManaged(&dst_ptr, ROWS*COLS*3)) return -2;
cv::Mat dst(rows, cols, CV_8UC3, dst_ptr);
cv::cuda::GpuMat d_dst(rows, cols, CV_8UC3, dst_ptr);
for (const auto& entry : fs::directory_iterator(input_folder))
{
std::cout << "Before imread : src total = " << src.total() << " src elemsize = " << src.elemSize() << " src_ptr : " << src_ptr << " &(src 0 0)" << &src.at<cv::Vec3b>(0, 0) << std::endl;
// Reading image
src = cv::imread(entry.path());
std::cout << "After imread : src total = " << src.total() << " src elemsize = " << src.elemSize() << " src_ptr : " << src_ptr << " &(src 0 0)" << &src.at<cv::Vec3b>(0, 0) << std::endl;
// Some action on cuda device
cv::cuda::rotate(d_src, d_dst, d_dst.size(), 180.0);
// write output file
std::string out_name = output_folder + "preprocessed_image" + std::to_string(index) + ".jpg";
index++;
cv::imwrite(out_name, dst);
break; // only the first one is needed for the example
}
return 0;
}
With this example, when the imread is perform (so a mat create), the data pointer is reallocated for the first loop. It seems weird since mat create should verify mat::total * mat::elemsize, and not reallocate if the result is the same as what is read in the image. And here this is the case. I'm printing the addresses of src_ptr and the data address of the src mat : before the imread, they are identical, after the imread, the second one changed.
My goal is to avoid explicit data transfer between host and cuda device. I really don't know why the data pointer is reallocated, i'm looking forward to some advice in order to correct this issu or avoid it. Thank you in advance.