Ask Your Question

how to use parallel_for_ to classify multiple objects by using Opencv3.3 dnn

asked 2017-10-27 05:00:41 -0600

kerollos gravatar image

updated 2017-10-28 04:54:42 -0600

berak gravatar image

I would like to run dnn in opencv3.3 parallel to increase the speed of object recognition: this is my code

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utils/trace.hpp>
using namespace cv;
using namespace cv::dnn;

#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;

// Parallel Programming

#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
using namespace tbb;
String modelTxt = "caffenet_deploy_2.prototxt";
String modelBin = "caffe_model_2_iter_15000.caffemodel";

static void getMaxClass( Mat &probBlob, int *classId, double *classProb)

    Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
    Point classNumber;

    minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
    *classId = classNumber.x;

class Parallel_process : public cv::ParallelLoopBody


    std::vector<cv::dnn::experimental_dnn_v1::Net> net ;
    int numberofClass;
    double probabilityOfClass;
    std::string PictureName;
    int n;
    vector<int> classId_array_parallel;
    vector<double> classProb_array_parallel;
    vector<cv::String> fnClass;
    Mat prob;


    Parallel_process(vector<cv::String>& fn)
        : fnClass(fn){}
    void operator()(const cv::Range& range) const
        net = dnn::readNetFromCaffe(modelTxt, modelBin);
        for(int y = range.start(); y < range.end(); y++)
            net = dnn::readNetFromCaffe(modelTxt, modelBin);
            cv::Mat inputIm = cv::imread(fnClass[y]);
            Mat inputBlob = blobFromImage(inputIm, 1.0f, Size(227, 227),
                                          Scalar(104, 117, 123),false);

            net.setInput(inputBlob, "data");
            prob = net.forward("prob");
            getMaxClass(prob, &numberofClass, &probabilityOfClass);//find the best class
            std::cout << "Best class: #" << numberofClass << " '" << ",  image = " << fnClass[y] <<std::endl;
            std::cout << "Probability: " << probabilityOfClass * 100 << "%" << std::endl;
            //String label = String(classNames[classId_array[k]]);
            //std::cout << "Best class: #" << classId_array_parallel[y] << " '" << ",  image = " << fnClass[y] <<std::endl;
            //std::cout << "Probability: " << classProb_array_parallel[y] * 100 << "%" << std::endl;


int main(int argc, char **argv)


    String path("Images/*.png"); 
    vector<cv::String> fn;
    vector<cv::Mat> data;

    int classId;
    double classProb;
    vector<int> classId_array;
    vector<double> classProb_array;

    Mat prob;
    cv::TickMeter t;
    int numberImage = fn.size();

    /// Parallel loop

    parallel_for_(blocked_range(0,numberImage), Parallel_process(fn));

    return 0;

$ g++ -o test_1 google_parallel.cpp `pkg-config opencv --cflags --libs`
google_parallel.cpp: In member function ‘void Parallel_process::operator()(const tbb::blocked_range<int>&) const’:
google_parallel.cpp:107:17: error: no match for ‘operator=’ (operand types are ‘const std::vector<cv::dnn::experimental_dnn_v1::Net>’ and ‘cv::dnn::experimental_dnn_v1::Net’)
             net = dnn::readNetFromCaffe(modelTxt, modelBin);
In file included from /usr/include/c++/5/vector:69:0,
                 from /usr/local/include/opencv2/dnn/dnn.hpp:45,
                 from /usr/local/include/opencv2/dnn.hpp:62,
                 from google_parallel.cpp:42:
/usr/include/c++/5/bits/vector.tcc:167:5: note: candidate: std::vector<_Tp, _Alloc>& std::vector<_Tp, _Alloc>::operator=(const std::vector<_Tp, _Alloc>&) [with _Tp = cv::dnn::experimental_dnn_v1::Net; _Alloc = std::allocator<cv::dnn::experimental_dnn_v1::Net>]
     vector<_Tp, _Alloc>::
/usr/include/c++/5/bits/vector.tcc:167:5: note:   no known conversion for argument 1 from ‘cv::dnn::experimental_dnn_v1::Net’ to ‘const std::vector<cv::dnn::experimental_dnn_v1::Net>&’
google_parallel.cpp:112:17: error: ‘const class std::vector<cv::dnn::experimental_dnn_v1::Net>’ has no member named ‘setInput’
             net.setInput(inputBlob, "data");
google_parallel.cpp:113:24: error: ‘const class std::vector<cv::dnn::experimental_dnn_v1::Net>’ has no member named ...
edit retag flag offensive close merge delete


@kerollos, please read error messages. net is a vector.

dkurt gravatar imagedkurt ( 2017-10-27 06:25:37 -0600 )edit

the coding errors might be the least of your problems here.

imho, the whole concept does not make any sense. dnn::Net is not threadsafe, so you have to build and load one network graph per image, which will just burn your machine, it's for sure much more expensive, than loading one network, and checking your images sequentially.

berak gravatar imageberak ( 2017-10-28 10:34:14 -0600 )edit

One more way is to forward batch of images. In example, batch of 2 images takes x1.5 more time rather forward pass of single image. You can queue frames in one thread and process in batches at another one. So with well tuned delays you can achieve more FPS I think.

dkurt gravatar imagedkurt ( 2017-10-28 22:31:22 -0600 )edit

1 answer

Sort by » oldest newest most voted

answered 2017-10-29 09:55:37 -0600

berak gravatar image

so, the answer is probably the same as always: rather use opencv's internal parallelization, than trying to multithread your own. here's dkurt's idea with the batches. also i don't think, you need a 2nd thread for loading the images, but let's do it anyway:

// modified caffe_googlenet.cpp

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;

#include <fstream>
#include <iostream>
#include <cstdlib>
#include <thread>
using namespace std;

static std::vector<String> readClassNames(const char *filename = "synset_words.txt")
    std::vector<String> classNames;

    std::ifstream fp(filename);
    if (!fp.is_open())
        std::cerr << "File with classes labels not found: " << filename << std::endl;

    std::string name;
    while (!fp.eof())
        std::getline(fp, name);
        if (name.length())
            classNames.push_back( name.substr(name.find(' ')+1) );

    return classNames;

void makeBatch(const vector<String> &names, vector<Mat> &batch, size_t from, size_t to)
    for (size_t i=from; i<min(to,names.size()); i++)
        Mat img = imread(names[i]);
        if (img.empty())
        // order of operations matters !
        // we can't simply resize() our image, 
        // since we have to emulate the default crop=true option
        Size size(224,224);
        float resizeFactor = std::max(size.width  / (float)img.cols,
                                      size.height / (float)img.rows);
        resize(img, img, Size(), resizeFactor, resizeFactor);
        Rect crop(Point(0.5 * (img.cols - size.width),
                        0.5 * (img.rows - size.height)), size);
        img = img(crop);
        img.convertTo(img, CV_32F);
        img -= Scalar(104, 117, 123); // subtract mean

int main(int argc, char **argv)
    String modelTxt = "bvlc_googlenet.prototxt";
    String modelBin = "bvlc_googlenet.caffemodel";
    String imageDir = (argc > 1) ? argv[1] : "C:\\data\\img\\cache\\1";

    vector<String> images;
    glob(imageDir, images);
    cout << images.size() << " images on " << imageDir << endl;

    vector<String> classNames = readClassNames();

    Net net = dnn::readNetFromCaffe(modelTxt, modelBin);

    int batchsize = 8;
    int from = 0;
    int to = batchsize;
    cv::TickMeter t;

    // we have to run the 1st batch "manually"
    vector<Mat> batch;
    thread runner(makeBatch, std::ref(images), std::ref(batch), from, to);

    while (to < images.size())
        // wait for our images

        // we've done the preprocessing already.
        Mat inputBlob = blobFromImages(batch, 1.0, Size(), Scalar(), false);

        // start next round
        from += batchsize;
        to += batchsize;
        if (to<images.size())
            runner = thread(makeBatch, std::ref(images), std::ref(batch), from, to);

        net.setInput(inputBlob, "data");
        Mat prob = net.forward("prob");

        // each prediction is a row in the prob Mat
        for (size_t i=0; i<batch.size(); i++)
            Point classNumber; double classProb;

            minMaxLoc(prob.row(i), NULL, &classProb, NULL, &classNumber);
            int classId = classNumber.x;

            std::cout << "'" << << "'";
            std::cout << " (" << classProb * 100 << "%)" << std::endl;
    std::cout << "Time: " << (double)t.getTimeMilli() / (batchsize * t.getCounter()) << " ms (average from " << t.getCounter() << " * " << batchsize << " iterations)" << std::endl;
    return 0;
edit flag offensive delete link more

Question Tools

1 follower


Asked: 2017-10-27 05:00:41 -0600

Seen: 2,080 times

Last updated: Oct 29 '17