I have a class called UnifiedMemoryMat which uses cudaMallocManaged
to share memory between a cv::Mat
and cv::cuda::GpuMat
. These two are accessible from the class by the user with m_image_cpu
and m_image_gpu
. My application is capturing images and then processing them in the shared memory UnifiedMemoryMat. I use cv::cuda::resize
with umm.m_image_gpu
. When I proceed to my next capure, I am not able to upload the new image to my shared memory and the old image stays there. I verify that the image is uploaded properly with if(m_image_gpu.data != m_um_ptr)
where m_um_ptr
is a void *
that I send to cudaMallocaManaged()
. This behavior is extra weird because it works properly when I use cv::resize
with umm.m_image_cpu
. Even weirder, I can get cv::cuda::resize
to work properly if earlier in the program I have cv::GpuMat image_gpu = umm.m_image_gpu
. This doesn't cause an upload issue in my UnifiedMemoryMat. The only difference between UnifiedMemoryMat umm
and cv::GpuMat image_gpu
is the scope they are declared in. umm
is declared in an outer scope so that I don't have to reallocate the memory each while loop iteration. image_gpu
is in the scope of each image capture, but they refer to the same image so I'm not sure why this issue would happen.
My UnifiedMemoryMat is defined as follows (this is simplified a bit): header:
class UnifiedMemoryMat {
void *m_um_ptr;
public:
cv::Mat m_image_cpu;
cv::cuda::GpuMat m_image_gpu;
int allocateMemory(cv::Mat img);
int uploadImage(cv::Mat img);
};
defnition:
int UnifiedMemoryMat::allocateMemory(cv::Mat img) {
unsigned int image_byte_size = img.rows * img.cols * img.channels();
if(cudaMallocManaged(&um_ptr, image_byte_size) != cudaSuccess) {
return -1;
}
m_image_cpu = cv::Mat(img.size(), img.type(), m_um_ptr);
m_image_gpu = cv::cuda::GpuMat(img.size(), img.type(), m_um_ptr);
return 0;
}
int UnifiedMemoryMat::uploadImage(cv::Mat img) {
m_image_gpu.upload(img);
if(m_image_gpu.data != m_um_ptr) {
return -1;
}
return 0;
}
psuedocode of application:
UnifiedMemoryMat um_images[4];
//initialization
//allocate memory for each um_images using UnifiedMemoryMat::allocateImage();
while(capturing) {
cv::Mat images[4];
cv::cuda::GpuMat images_gpu[4];
//capture images
for(int i = 0; i < 4; ++i) {
um_images[i].uploadImage(camera_images); //problem occurs here after the first iteration when using cv::cuda::resize with um_images[i].m_image_gpu
images[i] = um_images[i].m_image_cpu;
image_gpu[i] = um_images[i].m_image_gpu;
}
//processing
for(inti = 0; i < 4; ++i) {
cv::resize(um_images[i].m_image_cpu, um_images[i].m_image_cpu, size); //works
cv::resize(images[i], images[i], size); //works
cv::resize(images_gpu[i], images_gpu[i], size); //works
cv::cuda::reisze(um_images[i].m_image_gpu, um_images[i].m_image_gpu, size); //does not work
}
}