CUDA GoodFeaturesToTrackDetector is not ThreadSafe ?

asked 2020-03-20 06:33:40 -0500

AlexBn gravatar image

Hello I use OpenCV 4.2 with CUDA 10.2, Visual studio 2019 on windows 10. While using OpenCV CUDA GoodFeaturesToTrackDetector in parallel loop I noticed that I get systematic Exception "merge_sort: failed to synchronize" , though I run it on different cuda::GpuMats and in separate cuda::Streams with separate Algorithm instances.

reduced my code to minimum reproducible example :

#include <iostream>
#include <list>
#include <thread>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/cvconfig.h>
#include <opencv2/opencv.hpp>
#if defined(HAVE_CUDA)
#include <opencv2/core/cuda.hpp>
#include <opencv2/core/cuda/common.hpp>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/cudaoptflow.hpp>
#include <opencv2/cudaarithm.hpp>
#endif
#include <thread>
#include <vector>

using namespace std; using namespace cv;

int main() {
    int NBThread = 5;
    Mat frames = imread("C:\\Users\\alex\\Desktop\\test.png");
    cvtColor(frames, frames, COLOR_BGR2GRAY);

    vector<Mat> vectImg;
    for (int u = 0; u < NBThread; u++)
        vectImg.push_back(frames.clone());

    for (int i = 0; i < 100000; i++) 
    {
        vector<thread> workers;
        mutex m;
        for (int id = 0; id < NBThread; ++id) 
            workers.emplace_back([&, id]()
                { 
                    Size frameSize = vectImg[id].size();
                    // Creation du detecteur
                    int     srcType = CV_8UC1;
                    int     maxCorners = /*1000*/   4000;
                    double  qualityLevel = /*0.01*/ 0.01;
                    double  minDistance = /*0.0*/   0.0;
                    int     blockSize = /*3*/       3;
                    bool    useHarrisDetector = /*false*/   false;
                    double  harrisK = /*0.04*/  0.04;
                    auto m_CudaDetector = cv::cuda::createGoodFeaturesToTrackDetector(srcType, maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, harrisK);

                    cuda::Stream stream;
                    cuda::GpuMat gpuFrame = cuda::GpuMat(frameSize, CV_8UC1);
                    gpuFrame.upload(vectImg[id], stream);
                    cv::cuda::GpuMat d_prevRef;
                    m_CudaDetector->detect(gpuFrame, d_prevRef, cuda::GpuMat(), stream);
                    stream.waitForCompletion();

                    std::cout << " Nombre de points detect = " << d_prevRef.size() << " thread : " << id << std::endl;
                });

        for (auto& worker : workers) worker.join();
    }
    return 0; }

after many loop I get Exception with CallStack :

  • opencv_cudaimgproc420d.dll!thrust::cuda_cub::throw_on_error(cudaError status, const char * msg) Line 227 C++
  • opencv_cudaimgproc420d.dll!thrust::cuda_cub::__merge_sort::merge_sort .... Line 1318 C++
  • opencv_cudaimgproc420d.dll!thrust::cuda_cub::__smart_sort::smart_sort ... Line 1552 C++
  • opencv_cudaimgproc420d.dll!thrust::cuda_cub::sort ... Line 1631 C++
  • opencv_cudaimgproc420d.dll!thrust::sort ... Line 57 C++
  • opencv_cudaimgproc420d.dll!cv::cuda::device::gfft::sortCorners_gpu(cv::cuda::PtrStepSz<float> eig, float2 * corners,int count, CUstream_st * stream) Line 139 C++
  • opencv_cudaimgproc420d.dll!`anonymous namespace'::GoodFeaturesToTrackDetector::detect(const cv::debug_build_guard::_InputArray & _image, const cv::debug_build_guard::_OutputArray & _corners, const cv::debug_build_guard::_InputArray & _mask, cv::cuda::Stream & stream) Line 125 C++

I must conclude thatOpencv Cuda GoodFeaturesToTrackDetector is not threadsafe despite usage of the Stream s ?

edit retag flag offensive close merge delete