Revision history [back]

so, the answer is probably the same as always: rather use opencv's internal parallelization, than trying to multithread your own. here's dkurt's idea with the batches. also i don't think, you need a 2nd thread for loading the images, but let's do it anyway:
// modified caffe_googlenet.cpp

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;

#include <fstream>
#include <iostream>
#include <cstdlib>
#include <thread>
using namespace std;


static std::vector<String> readClassNames(const char *filename = "synset_words.txt")
{
    std::vector<String> classNames;

    std::ifstream fp(filename);
    if (!fp.is_open())
    {
        std::cerr << "File with classes labels not found: " << filename << std::endl;
        exit(-1);
    }

    std::string name;
    while (!fp.eof())
    {
        std::getline(fp, name);
        if (name.length())
            classNames.push_back( name.substr(name.find(' ')+1) );
    }

    fp.close();
    return classNames;
}

void makeBatch(const vector<String> &names, vector<Mat> &batch, size_t from, size_t to)
{
    batch.clear();
    for (size_t i=from; i<min(to,names.size()); i++)
    {
        Mat img = imread(names[i]);
        if (img.empty())
            continue;
        // order of operations matters !
        // we can't simply resize() our image, 
        // since we have to emulate the default crop=true option
        Size size(224,224);
        float resizeFactor = std::max(size.width  / (float)img.cols,
                                      size.height / (float)img.rows);
        resize(img, img, Size(), resizeFactor, resizeFactor);
        Rect crop(Point(0.5 * (img.cols - size.width),
                        0.5 * (img.rows - size.height)), size);
        img = img(crop);
        img.convertTo(img, CV_32F);
        img -= Scalar(104, 117, 123); // subtract mean
        batch.push_back(img);
    }
}

int main(int argc, char **argv)
{
    String modelTxt = "bvlc_googlenet.prototxt";
    String modelBin = "bvlc_googlenet.caffemodel";
    String imageDir = (argc > 1) ? argv[1] : "C:\\data\\img\\cache\\1";

    vector<String> images;
    glob(imageDir, images);
    cout << images.size() << " images on " << imageDir << endl;

    vector<String> classNames = readClassNames();

    Net net = dnn::readNetFromCaffe(modelTxt, modelBin);

    int batchsize = 8;
    int from = 0;
    int to = batchsize;
    cv::TickMeter t;

    // we have to run the 1st batch "manually"
    vector<Mat> batch;
    thread runner(makeBatch, std::ref(images), std::ref(batch), from, to);

    while (to < images.size())
    {
        // wait for our images
        runner.join();

        // we've done the preprocessing already.
        Mat inputBlob = blobFromImages(batch, 1.0, Size(), Scalar(), false);

        // start next round
        from += batchsize;
        to += batchsize;
        if (to<images.size())
            runner = thread(makeBatch, std::ref(images), std::ref(batch), from, to);


        net.setInput(inputBlob, "data");
        t.start();
        Mat prob = net.forward("prob");
        t.stop();

        // each prediction is a row in the prob Mat
        for (size_t i=0; i<batch.size(); i++)
        {
            Point classNumber; double classProb;

            minMaxLoc(prob.row(i), NULL, &classProb, NULL, &classNumber);
            int classId = classNumber.x;

            std::cout << "'" << classNames.at(classId) << "'";
            std::cout << " (" << classProb * 100 << "%)" << std::endl;
        }
    }
    std::cout << "Time: " << (double)t.getTimeMilli() / (batchsize * t.getCounter()) << " ms (average from " << t.getCounter() << " * " << batchsize << " iterations)" << std::endl;
    return 0;
}