OpenCV parallel_for_ splits ranges for single item

asked 2019-10-04 06:35:32 -0500

Humam Helfawi gravatar image

I am running this tutorial example:

int mandelbrot(const std::complex<float> &z0, const int max) {
    std::complex<float> z = z0;
    for(int t = 0; t < max; t++) {
        if(z.real()*z.real() + z.imag()*z.imag() > 4.0f) return t;
        z = z * z + z0;
    }
    return max;
}

int mandelbrotFormula(const std::complex<float> &z0, const int maxIter = 500) {
    int value = mandelbrot(z0, maxIter);
    if(maxIter - value == 0) {
        return 0;
    }
    return cvRound(sqrt(value / (float)maxIter) * 255);
}

void test(){
    cv::Mat mandelbrotImg(4800, 5400, CV_8U);
    float x1 = -2.1f, x2 = 0.6f;
    float y1 = -1.2f, y2 = 1.2f;
    float scaleX = mandelbrotImg.cols / (x2 - x1);
    float scaleY = mandelbrotImg.rows / (y2 - y1);
    cv::parallel_for_(cv::Range(0, mandelbrotImg.rows*mandelbrotImg.cols), [&](const cv::Range& range) {
        for(int r = range.start; r < range.end; r++) {
            int i = r / mandelbrotImg.cols;
            int j = r % mandelbrotImg.cols;
            float x0 = j / scaleX + x1;
            float y0 = i / scaleY + y1;
            std::complex<float> z0(x0, y0);
            uchar value = (uchar)mandelbrotFormula(z0);
            mandelbrotImg.ptr<uchar>(i)[j] = value;
        }
    });
}

If I call cv::setNumThreads(0);, the code run in single call and range is between 0 and number of pixels. This is correct behavior. However, when I run with cv::setNumThreads(4);, the lambda is called N times(where N is the number of the pixels) and range is always single item (e.g. [0,1] , [1,2] ...).

This makes the parallel version much slower than the serial version (10x slower).

Any idea why this is happening?

edit retag flag offensive close merge delete