how to use parallel_for_ to classify multiple objects by using Opencv3.3 dnn
I would like to run dnn in opencv3.3 parallel to increase the speed of object recognition: this is my code
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/utils/trace.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
// Parallel Programming
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
using namespace tbb;
String modelTxt = "caffenet_deploy_2.prototxt";
String modelBin = "caffe_model_2_iter_15000.caffemodel";
static void getMaxClass( Mat &probBlob, int *classId, double *classProb)
{
Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
Point classNumber;
minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
*classId = classNumber.x;
}
class Parallel_process : public cv::ParallelLoopBody
{
private:
std::vector<cv::dnn::experimental_dnn_v1::Net> net ;
int numberofClass;
double probabilityOfClass;
std::string PictureName;
int n;
vector<int> classId_array_parallel;
vector<double> classProb_array_parallel;
vector<cv::String> fnClass;
Mat prob;
public:
Parallel_process(vector<cv::String>& fn)
: fnClass(fn){}
void operator()(const cv::Range& range) const
{
net = dnn::readNetFromCaffe(modelTxt, modelBin);
for(int y = range.start(); y < range.end(); y++)
{
net = dnn::readNetFromCaffe(modelTxt, modelBin);
cv::Mat inputIm = cv::imread(fnClass[y]);
Mat inputBlob = blobFromImage(inputIm, 1.0f, Size(227, 227),
Scalar(104, 117, 123),false);
net.setInput(inputBlob, "data");
prob = net.forward("prob");
getMaxClass(prob, &numberofClass, &probabilityOfClass);//find the best class
std::cout << "Best class: #" << numberofClass << " '" << ", image = " << fnClass[y] <<std::endl;
std::cout << "Probability: " << probabilityOfClass * 100 << "%" << std::endl;
//String label = String(classNames[classId_array[k]]);
//std::cout << "Best class: #" << classId_array_parallel[y] << " '" << ", image = " << fnClass[y] <<std::endl;
//std::cout << "Probability: " << classProb_array_parallel[y] * 100 << "%" << std::endl;
}
}
};
int main(int argc, char **argv)
{
CV_TRACE_FUNCTION();
String path("Images/*.png");
vector<cv::String> fn;
vector<cv::Mat> data;
cv::glob(path,fn,true);
int classId;
double classProb;
vector<int> classId_array;
vector<double> classProb_array;
Mat prob;
cv::TickMeter t;
int numberImage = fn.size();
/// Parallel loop
parallel_for_(blocked_range(0,numberImage), Parallel_process(fn));
return 0;
}
$ g++ -o test_1 google_parallel.cpp `pkg-config opencv --cflags --libs`
google_parallel.cpp: In member function ‘void Parallel_process::operator()(const tbb::blocked_range<int>&) const’:
google_parallel.cpp:107:17: error: no match for ‘operator=’ (operand types are ‘const std::vector<cv::dnn::experimental_dnn_v1::Net>’ and ‘cv::dnn::experimental_dnn_v1::Net’)
net = dnn::readNetFromCaffe(modelTxt, modelBin);
^
In file included from /usr/include/c++/5/vector:69:0,
from /usr/local/include/opencv2/dnn/dnn.hpp:45,
from /usr/local/include/opencv2/dnn.hpp:62,
from google_parallel.cpp:42:
/usr/include/c++/5/bits/vector.tcc:167:5: note: candidate: std::vector<_Tp, _Alloc>& std::vector<_Tp, _Alloc>::operator=(const std::vector<_Tp, _Alloc>&) [with _Tp = cv::dnn::experimental_dnn_v1::Net; _Alloc = std::allocator<cv::dnn::experimental_dnn_v1::Net>]
vector<_Tp, _Alloc>::
^
/usr/include/c++/5/bits/vector.tcc:167:5: note: no known conversion for argument 1 from ‘cv::dnn::experimental_dnn_v1::Net’ to ‘const std::vector<cv::dnn::experimental_dnn_v1::Net>&’
google_parallel.cpp:112:17: error: ‘const class std::vector<cv::dnn::experimental_dnn_v1::Net>’ has no member named ‘setInput’
net.setInput(inputBlob, "data");
^
google_parallel.cpp:113:24: error: ‘const class std::vector<cv::dnn::experimental_dnn_v1::Net>’ has no member named ...
@kerollos, please read error messages.
net
is a vector.the coding errors might be the least of your problems here.
imho, the whole concept does not make any sense. dnn::Net is not threadsafe, so you have to build and load one network graph per image, which will just burn your machine, it's for sure much more expensive, than loading one network, and checking your images sequentially.
One more way is to forward batch of images. In example, batch of 2 images takes x1.5 more time rather forward pass of single image. You can queue frames in one thread and process in batches at another one. So with well tuned delays you can achieve more FPS I think.