Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

cv::cuda::resize not working properly with unified memory

I have a class called UnifiedMemoryMat which uses cudaMallocManaged to share memory between a cv::Mat and cv::cuda::GpuMat. These two are accessible from the class by the user with m_image_cpu and m_image_gpu. My application is capturing images and then processing them in the shared memory UnifiedMemoryMat. I use cv::cuda::resize with umm.m_image_gpu. When I proceed to my next capure, I am not able to upload the new image to my shared memory and the old image stays there. I verify that the image is uploaded properly with if(m_image_gpu.data != m_um_ptr) where m_um_ptr is a void * that I send to cudaMallocaManaged(). This behavior is extra weird because it works properly when I use cv::resize with umm.m_image_cpu. Even weirder, I can get cv::cuda::resize to work properly if earlier in the program I have cv::GpuMat image_gpu = umm.m_image_gpu. This doesn't cause an upload issue in my UnifiedMemoryMat. The only difference between UnifiedMemoryMat umm and cv::GpuMat image_gpu is the scope they are declared in. umm is declared in an outer scope so that I don't have to reallocate the memory each while loop iteration. image_gpu is in the scope of each image capture, but they refer to the same image so I'm not sure why this issue would happen.

My UnifiedMemoryMat is defined as follows (this is simplified a bit): header:

class UnifiedMemoryMat {
    void *m_um_ptr;

    public:
        cv::Mat m_image_cpu;
        cv::cuda::GpuMat m_image_gpu;

        int allocateMemory(cv::Mat img);
        int uploadImage(cv::Mat img);
};

defnition:

int UnifiedMemoryMat::allocateMemory(cv::Mat img) {
    unsigned int image_byte_size = img.rows * img.cols * img.channels();

    if(cudaMallocManaged(&um_ptr, image_byte_size) != cudaSuccess) {
        return -1;
    }

    m_image_cpu = cv::Mat(img.size(), img.type(), m_um_ptr);
    m_image_gpu = cv::cuda::GpuMat(img.size(), img.type(), m_um_ptr);
   return 0;
}

int UnifiedMemoryMat::uploadImage(cv::Mat img) {
    m_image_gpu.upload(img);
    if(m_image_gpu.data != m_um_ptr) {
        return -1;
    }
    return 0;
}

psuedocode of application:

UnifiedMemoryMat um_images[4];

//initialization
//allocate memory for each um_images using UnifiedMemoryMat::allocateImage();

while(capturing) {
    cv::Mat images[4];
    cv::cuda::GpuMat images_gpu[4];

    //capture images

    for(int i = 0; i < 4; ++i) {
        um_images[i].uploadImage(camera_images); //problem occurs here after the first iteration when using cv::cuda::resize with um_images[i].m_image_gpu
        images[i] = um_images[i].m_image_cpu;
        image_gpu[i] = um_images[i].m_image_gpu;
    }

    //processing

    for(inti = 0; i < 4; ++i) {
        cv::resize(um_images[i].m_image_cpu, um_images[i].m_image_cpu, size); //works
        cv::resize(images[i], images[i], size); //works
        cv::resize(images_gpu[i], images_gpu[i], size); //works
        cv::cuda::reisze(um_images[i].m_image_gpu, um_images[i].m_image_gpu, size); //does not work
    }
}

cv::cuda::resize not working properly with unified memory

I have a class called UnifiedMemoryMat which uses cudaMallocManaged to share memory between a cv::Mat and cv::cuda::GpuMat. These two are accessible from the class by the user with m_image_cpu and m_image_gpu. My application is capturing images and then processing them in the shared memory UnifiedMemoryMat. I use cv::cuda::resize with umm.m_image_gpu. When I proceed to my next capure, I am not able to upload the new image to my shared memory and the old image stays there. I verify that the image is uploaded properly with if(m_image_gpu.data != m_um_ptr) where m_um_ptr is a void * that I send to cudaMallocaManaged(). This behavior is extra weird because it works properly when I use cv::resize with umm.m_image_cpu. Even weirder, I can get cv::cuda::resize to work properly if earlier in the program I have cv::GpuMat image_gpu = umm.m_image_gpu. This doesn't cause an upload issue in my UnifiedMemoryMat. The only difference between UnifiedMemoryMat umm and cv::GpuMat image_gpu is the scope they are declared in. umm is declared in an outer scope so that I don't have to reallocate the memory each while loop iteration. image_gpu is in the scope of each image capture, but they refer to the same image so I'm not sure why this issue would happen.

My UnifiedMemoryMat is defined as follows (this is simplified a bit): header:

class UnifiedMemoryMat {
    void *m_um_ptr;

    public:
        cv::Mat m_image_cpu;
        cv::cuda::GpuMat m_image_gpu;

        int allocateMemory(cv::Mat img);
        int uploadImage(cv::Mat img);
};

defnition:

int UnifiedMemoryMat::allocateMemory(cv::Mat img) {
    unsigned int image_byte_size = img.rows * img.cols * img.channels();

    if(cudaMallocManaged(&um_ptr, image_byte_size) != cudaSuccess) {
        return -1;
    }

    m_image_cpu = cv::Mat(img.size(), img.type(), m_um_ptr);
    m_image_gpu = cv::cuda::GpuMat(img.size(), img.type(), m_um_ptr);
   return 0;
}

int UnifiedMemoryMat::uploadImage(cv::Mat img) {
    m_image_gpu.upload(img);
    if(m_image_gpu.data != m_um_ptr) {
        return -1;
    }
    return 0;
}

psuedocode of application:

UnifiedMemoryMat um_images[4];

//initialization
//allocate memory for each um_images using UnifiedMemoryMat::allocateImage();

while(capturing) {
    cv::Mat images[4];
    cv::cuda::GpuMat images_gpu[4];

    //capture images

    for(int i = 0; i < 4; ++i) {
        um_images[i].uploadImage(camera_images); //problem occurs here after the first iteration when using cv::cuda::resize with um_images[i].m_image_gpu
        images[i] = um_images[i].m_image_cpu;
        image_gpu[i] = um_images[i].m_image_gpu;
    }

    //processing

    for(inti = 0; i < 4; ++i) {
        cv::resize(um_images[i].m_image_cpu, um_images[i].m_image_cpu, size); //works
        cv::resize(images[i], images[i], size); //works
        cv::resize(images_gpu[i], images_gpu[i], size); //works
        cv::cuda::reisze(um_images[i].m_image_gpu, um_images[i].m_image_gpu, size); //does not work
    }
}

cv::cuda::resize not working properly with unified shared memory

I have a class called UnifiedMemoryMat which uses cudaMallocManaged to share memory between a cv::Mat and cv::cuda::GpuMat. These two are accessible from the class by the user with m_image_cpu and m_image_gpu. My application is capturing images and then processing them in the shared memory UnifiedMemoryMat. I use cv::cuda::resize with umm.m_image_gpu. When I proceed to my next capure, I am not able to upload the new image to my shared memory and the old image stays there. I verify that the image is uploaded properly with if(m_image_gpu.data != m_um_ptr) where m_um_ptr is a void * that I send to cudaMallocaManaged(). This behavior is extra weird because it works properly when I use cv::resize with umm.m_image_cpu. Even weirder, I can get cv::cuda::resize to work properly if earlier in the program I have cv::GpuMat image_gpu = umm.m_image_gpu. This doesn't cause an upload issue in my UnifiedMemoryMat. The only difference between UnifiedMemoryMat umm and cv::GpuMat image_gpu is the scope they are declared in. umm is declared in an outer scope so that I don't have to reallocate the memory each while loop iteration. image_gpu is in the scope of each image capture, but they refer to the same image so I'm not sure why this issue would happen.

My UnifiedMemoryMat is defined as follows (this is simplified a bit): header:

class UnifiedMemoryMat {
    void *m_um_ptr;

    public:
        cv::Mat m_image_cpu;
        cv::cuda::GpuMat m_image_gpu;

        int allocateMemory(cv::Mat img);
        int uploadImage(cv::Mat img);
};

defnition:

int UnifiedMemoryMat::allocateMemory(cv::Mat img) {
    unsigned int image_byte_size = img.rows * img.cols * img.channels();

    if(cudaMallocManaged(&um_ptr, image_byte_size) != cudaSuccess) {
        return -1;
    }

    m_image_cpu = cv::Mat(img.size(), img.type(), m_um_ptr);
    m_image_gpu = cv::cuda::GpuMat(img.size(), img.type(), m_um_ptr);
   return 0;
}

int UnifiedMemoryMat::uploadImage(cv::Mat img) {
    m_image_gpu.upload(img);
    if(m_image_gpu.data != m_um_ptr) {
        return -1;
    }
    return 0;
}

psuedocode of application:

UnifiedMemoryMat um_images[4];

//initialization
//allocate memory for each um_images using UnifiedMemoryMat::allocateImage();

while(capturing) {
    cv::Mat images[4];
    cv::cuda::GpuMat images_gpu[4];

    //capture images

    for(int i = 0; i < 4; ++i) {
        um_images[i].uploadImage(camera_images); //problem occurs here after the first iteration when using cv::cuda::resize with um_images[i].m_image_gpu
        images[i] = um_images[i].m_image_cpu;
        image_gpu[i] = um_images[i].m_image_gpu;
    }

    //processing

    for(inti = 0; i < 4; ++i) {
        cv::resize(um_images[i].m_image_cpu, um_images[i].m_image_cpu, size); //works
        cv::resize(images[i], images[i], size); //works
        cv::resize(images_gpu[i], cv::cuda::resize(images_gpu[i], images_gpu[i], size); //works
        cv::cuda::reisze(um_images[i].m_image_gpu, um_images[i].m_image_gpu, size); //does not work
    }
}