1 | initial version |
I made an attempt to solve this by using parallel_for_ that seems to work. At the top of cascadeclassifier.cpp I added:
class Parallel_predict: public cv::ParallelLoopBody
{
private:
const vector< Ptr<cvcascadeboost> >& v;
int idx;
int * result;
public:
Parallel_predict(const vector< Ptr<cvcascadeboost> >& vectorToProcess, int i, int * r)
: v(vectorToProcess), idx(i), result(r){ *result=1; }
virtual void operator()( const cv::Range &r ) const {
for (int i = r.start ; (i!=r.end)&&(*result==1) ; ++i)
{
if (v[i]->predict(idx)==.0f)
{
*result=0;
return;
}
}
}
};
Then, also in cascadeclassifier.cpp, I updated the int CvCascadeClassifier::predict( int sampleIdx )
method to look like this:
int CvCascadeClassifier::predict( int sampleIdx )
{
CV_DbgAssert( sampleIdx < numPos + numNeg );
int result;
Parallel_predict p(stageClassifiers, sampleIdx, &result);
cv::parallel_for_(cv::Range(0,(int)stageClassifiers.size()), p);
return result;
/* OLD CODE
for (vector< Ptr<cvcascadeboost> >::iterator it = stageClassifiers.begin();
it != stageClassifiers.end(); it++ )
{
if ( (*it)->predict( sampleIdx ) == 0.f )
return 0;
}
return 1;*/
}
I have only tried this on OSX. It seems to work alright and it for sure use all of my cores :) There are however more code that could be paralellized (like in fillPassedSamples
) but those are not very obvious how to attack.
/MB