# Revision history [back]

### Problem understanding the code of SE detector

Problem I am trying to understand and hence improve the function predictEdges of structured edge detector from the ximgproc module of opencv_conrib . I have understood the "Structured forests for fast edge detection" paper on the same topic. I know that the function predictEdges pedicts the edges from the information available in the learnt "model" using some ensemble model of the output of various decision trees. But I don't know what each loop of the function is doing ?

Please explain the detectEdges, if possible ..

Also as from my experiments it appears that this implementation of SE detector takes at least 2 seconds for processing each image which may be slow for real time algorithms. So please suggest any improvement if possible

Thanks

### Problem understanding the code of SE detector

Problem I am trying to understand and hence improve the function predictEdges of structured edge detector from the ximgproc module of opencv_conrib . I have understood the "Structured forests for fast edge detection" paper on the same topic. I know that the function predictEdges pedicts the edges from the information available in the learnt "model" using some ensemble model of the output of various decision trees. But I don't know what each loop of the function is doing ?

Please explain the detectEdges, predictEdges, if possible ..

Also as from my experiments it appears that this implementation of SE detector takes at least 2 seconds for processing each image which may be slow for real time algorithms. So please suggest any improvement if possible

Thanks

One approach I am trying is to use openMP parallel for loop, It is almost reducing the runtime to half , but this is not a fundamental optimization. I am looking for other conceptual optimizations even at the cost of accuracy .

void predictEdges(const NChannelsMat &features, cv::Mat &dst) const
{
int shrink = __rf.options.shrinkNumber;
int rfs = __rf.options.regFeatureSmoothingRadius;
int sfs = __rf.options.ssFeatureSmoothingRadius;

int nTreesEval = __rf.options.numberOfTreesToEvaluate;
int nTrees = __rf.options.numberOfTrees;
int nTreesNodes = __rf.numberOfTreeNodes;

const int nchannels = features.channels();
int pSize  = __rf.options.patchSize;

int nFeatures = CV_SQR(pSize/shrink)*nchannels;
int outNum = __rf.options.numberOfOutputChannels;

int stride = __rf.options.stride;
int ipSize = __rf.options.patchInnerSize;
int gridSize = __rf.options.selfsimilarityGridSize;

const int height = cvCeil( double(features.rows*shrink - pSize) / stride );
const int width  = cvCeil( double(features.cols*shrink - pSize) / stride );
// image size in patches with overlapping

//-------------------------------------------------------------------------

NChannelsMat regFeatures = imsmooth(features, cvRound(rfs / float(shrink)));
NChannelsMat  ssFeatures = imsmooth(features, cvRound(sfs / float(shrink)));

NChannelsMat indexes(height, width, CV_MAKETYPE(DataType<int>::type, nTreesEval));

std::vector <int> offsetI(/**/ CV_SQR(pSize/shrink)*nchannels, 0);
for (int i = 0; i < CV_SQR(pSize/shrink)*nchannels; ++i)
{
int z = i / CV_SQR(pSize/shrink);
int y = ( i % CV_SQR(pSize/shrink) )/(pSize/shrink);
int x = ( i % CV_SQR(pSize/shrink) )%(pSize/shrink);

offsetI[i] = x*features.cols*nchannels + y*nchannels + z;
}
// lookup table for mapping linear index to offsets

std::vector <int> offsetE(/**/ CV_SQR(ipSize)*outNum, 0);
for (int i = 0; i < CV_SQR(ipSize)*outNum; ++i)
{
int z = i / CV_SQR(ipSize);
int y = ( i % CV_SQR(ipSize) )/ipSize;
int x = ( i % CV_SQR(ipSize) )%ipSize;

offsetE[i] = x*dst.cols*outNum + y*outNum + z;
}
// lookup table for mapping linear index to offsets

std::vector <int> offsetX( CV_SQR(gridSize)*(CV_SQR(gridSize) - 1)/2 * nchannels, 0);
std::vector <int> offsetY( CV_SQR(gridSize)*(CV_SQR(gridSize) - 1)/2 * nchannels, 0);

int hc = cvRound( (pSize/shrink) / (2.0*gridSize) );
// half of cell
std::vector <int> gridPositions;
for(int i = 0; i < gridSize; i++)
gridPositions.push_back( int( (i+1)*(pSize/shrink + 2*hc - 1)/(gridSize + 1.0) - hc + 0.5f ) );

for (int i = 0, n = 0; i < CV_SQR(gridSize)*nchannels; ++i)
for (int j = (i%CV_SQR(gridSize)) + 1; j < CV_SQR(gridSize); ++j, ++n)
{
int z = i / CV_SQR(gridSize);

int x1 = gridPositions[i%CV_SQR(gridSize)%gridSize];
int y1 = gridPositions[i%CV_SQR(gridSize)/gridSize];

int x2 = gridPositions[j%gridSize];
int y2 = gridPositions[j/gridSize];

offsetX[n] = x1*features.cols*nchannels + y1*nchannels + z;
offsetY[n] = x2*features.cols*nchannels + y2*nchannels + z;
}
// lookup tables for mapping linear index to offset pairs

for (int i = 0; i < height; ++i)
{
float *regFeaturesPtr = regFeatures.ptr<float>(i*stride/shrink);
float  *ssFeaturesPtr = ssFeatures.ptr<float>(i*stride/shrink);

int *indexPtr = indexes.ptr<int>(i);

for (int j = 0, k = 0; j < width; ++k, j += !(k %= nTreesEval))
// for j,k in [0;width)x[0;nTreesEval)
{
int baseNode = ( ((i + j)%(2*nTreesEval) + k)%nTrees )*nTreesNodes;
int currentNode = baseNode;
// select root node of the tree to evaluate

int offset = (j*stride/shrink)*nchannels;
while ( __rf.childs[currentNode] != 0 )
{
int currentId = __rf.featureIds[currentNode];
float currentFeature;

if (currentId >= nFeatures)
{
int xIndex = offsetX[currentId - nFeatures];
float A = ssFeaturesPtr[offset + xIndex];

int yIndex = offsetY[currentId - nFeatures];
float B = ssFeaturesPtr[offset + yIndex];

currentFeature = A - B;
}
else
currentFeature = regFeaturesPtr[offset + offsetI[currentId]];

// compare feature to threshold and move left or right accordingly
if (currentFeature < __rf.thresholds[currentNode])
currentNode = baseNode + __rf.childs[currentNode] - 1;
else
currentNode = baseNode + __rf.childs[currentNode];
}

indexPtr[j*nTreesEval + k] = currentNode;
}
}

NChannelsMat dstM(dst.size(),
CV_MAKETYPE(DataType<float>::type, outNum));
dstM.setTo(0);

float step = 2.0f * CV_SQR(stride) / CV_SQR(ipSize) / nTreesEval;
for (int i = 0; i < height; ++i)
{
int *pIndex = indexes.ptr<int>(i);
float *pDst = dstM.ptr<float>(i*stride);

for (int j = 0, k = 0; j < width; ++k, j += !(k %= nTreesEval))
{// for j,k in [0;width)x[0;nTreesEval)

int currentNode = pIndex[j*nTreesEval + k];

int start  = __rf.edgeBoundaries[currentNode];
int finish = __rf.edgeBoundaries[currentNode + 1];

if (start == finish)
continue;

int offset = j*stride*outNum;
for (int p = start; p < finish; ++p)
pDst[offset + offsetE[__rf.edgeBins[p]]] += step;
}
}

cv::reduce( dstM.reshape(1, int( dstM.total() ) ), dstM, 2, CV_REDUCE_SUM);
imsmooth( dstM.reshape(1, dst.rows), 1 ).copyTo(dst);
}


### Problem understanding the code of SE detector

Problem I am trying to understand and hence improve the function predictEdges of structured edge detector from the ximgproc module of opencv_conrib . I have understood the "Structured forests for fast edge detection" paper on the same topic. I know that the function predictEdges pedicts the edges from the information available in the learnt "model" using some ensemble model of the output of various decision trees. But I don't know what each loop of the function is doing ?

Please explain the predictEdges, if possible ..

Also as from my experiments it appears that this implementation of SE detector takes at least 2 seconds for processing each image which may be slow for real time algorithms. So please suggest any improvement if possible

Thanks

One approach I am trying is to use openMP parallel for loop, It is almost reducing the runtime to half , but this is not a fundamental optimization. I am looking for other conceptual optimizations even at the cost of accuracy .

void predictEdges(const NChannelsMat &features, cv::Mat &dst) const
{
int shrink = __rf.options.shrinkNumber;
int rfs = __rf.options.regFeatureSmoothingRadius;
int sfs = __rf.options.ssFeatureSmoothingRadius;

int nTreesEval = __rf.options.numberOfTreesToEvaluate;
int nTrees = __rf.options.numberOfTrees;
int nTreesNodes = __rf.numberOfTreeNodes;

const int nchannels = features.channels();
int pSize  = __rf.options.patchSize;

int nFeatures = CV_SQR(pSize/shrink)*nchannels;
int outNum = __rf.options.numberOfOutputChannels;

int stride = __rf.options.stride;
int ipSize = __rf.options.patchInnerSize;
int gridSize = __rf.options.selfsimilarityGridSize;

const int height = cvCeil( double(features.rows*shrink - pSize) / stride );
const int width  = cvCeil( double(features.cols*shrink - pSize) / stride );
// image size in patches with overlapping

//-------------------------------------------------------------------------

NChannelsMat regFeatures = imsmooth(features, cvRound(rfs / float(shrink)));
NChannelsMat  ssFeatures = imsmooth(features, cvRound(sfs / float(shrink)));

NChannelsMat indexes(height, width, CV_MAKETYPE(DataType<int>::type, nTreesEval));

std::vector <int> offsetI(/**/ CV_SQR(pSize/shrink)*nchannels, 0);
for (int i = 0; i < CV_SQR(pSize/shrink)*nchannels; ++i)
{
int z = i / CV_SQR(pSize/shrink);
int y = ( i % CV_SQR(pSize/shrink) )/(pSize/shrink);
int x = ( i % CV_SQR(pSize/shrink) )%(pSize/shrink);

offsetI[i] = x*features.cols*nchannels + y*nchannels + z;
}
// lookup table for mapping linear index to offsets

std::vector <int> offsetE(/**/ CV_SQR(ipSize)*outNum, 0);
for (int i = 0; i < CV_SQR(ipSize)*outNum; ++i)
{
int z = i / CV_SQR(ipSize);
int y = ( i % CV_SQR(ipSize) )/ipSize;
int x = ( i % CV_SQR(ipSize) )%ipSize;

offsetE[i] = x*dst.cols*outNum + y*outNum + z;
}
// lookup table for mapping linear index to offsets

std::vector <int> offsetX( CV_SQR(gridSize)*(CV_SQR(gridSize) - 1)/2 * nchannels, 0);
std::vector <int> offsetY( CV_SQR(gridSize)*(CV_SQR(gridSize) - 1)/2 * nchannels, 0);

int hc = cvRound( (pSize/shrink) / (2.0*gridSize) );
// half of cell
std::vector <int> gridPositions;
for(int i = 0; i < gridSize; i++)
gridPositions.push_back( int( (i+1)*(pSize/shrink + 2*hc - 1)/(gridSize + 1.0) - hc + 0.5f ) );

for (int i = 0, n = 0; i < CV_SQR(gridSize)*nchannels; ++i)
for (int j = (i%CV_SQR(gridSize)) + 1; j < CV_SQR(gridSize); ++j, ++n)
{
int z = i / CV_SQR(gridSize);

int x1 = gridPositions[i%CV_SQR(gridSize)%gridSize];
int y1 = gridPositions[i%CV_SQR(gridSize)/gridSize];

int x2 = gridPositions[j%gridSize];
int y2 = gridPositions[j/gridSize];

offsetX[n] = x1*features.cols*nchannels + y1*nchannels + z;
offsetY[n] = x2*features.cols*nchannels + y2*nchannels + z;
}
// lookup tables for mapping linear index to offset pairs

for (int i = 0; i < height; ++i)
{
float *regFeaturesPtr = regFeatures.ptr<float>(i*stride/shrink);
float  *ssFeaturesPtr = ssFeatures.ptr<float>(i*stride/shrink);

int *indexPtr = indexes.ptr<int>(i);

for (int j = 0, k = 0; j < width; ++k, j += !(k %= nTreesEval))
// for j,k in [0;width)x[0;nTreesEval)
{
int baseNode = ( ((i + j)%(2*nTreesEval) + k)%nTrees )*nTreesNodes;
int currentNode = baseNode;
// select root node of the tree to evaluate

int offset = (j*stride/shrink)*nchannels;
while ( __rf.childs[currentNode] != 0 )
{
int currentId = __rf.featureIds[currentNode];
float currentFeature;

if (currentId >= nFeatures)
{
int xIndex = offsetX[currentId - nFeatures];
float A = ssFeaturesPtr[offset + xIndex];

int yIndex = offsetY[currentId - nFeatures];
float B = ssFeaturesPtr[offset + yIndex];

currentFeature = A - B;
}
else
currentFeature = regFeaturesPtr[offset + offsetI[currentId]];

// compare feature to threshold and move left or right accordingly
if (currentFeature < __rf.thresholds[currentNode])
currentNode = baseNode + __rf.childs[currentNode] - 1;
else
currentNode = baseNode + __rf.childs[currentNode];
}

indexPtr[j*nTreesEval + k] = currentNode;
}
}

NChannelsMat dstM(dst.size(),
CV_MAKETYPE(DataType<float>::type, outNum));
dstM.setTo(0);

float step = 2.0f * CV_SQR(stride) / CV_SQR(ipSize) / nTreesEval;
for (int i = 0; i < height; ++i)
{
int *pIndex = indexes.ptr<int>(i);
float *pDst = dstM.ptr<float>(i*stride);

for (int j = 0, k = 0; j < width; ++k, j += !(k %= nTreesEval))
{// for j,k in [0;width)x[0;nTreesEval)

int currentNode = pIndex[j*nTreesEval + k];

int start  = __rf.edgeBoundaries[currentNode];
int finish = __rf.edgeBoundaries[currentNode + 1];

if (start == finish)
continue;

int offset = j*stride*outNum;
for (int p = start; p < finish; ++p)
pDst[offset + offsetE[__rf.edgeBins[p]]] += step;
}
}

cv::reduce( dstM.reshape(1, int( dstM.total() ) ), dstM, 2, CV_REDUCE_SUM);
imsmooth( dstM.reshape(1, dst.rows), 1 ).copyTo(dst);
}