When tracking an object, I want to be able to re-detect it after an occlusion.
On OpenCV 3.4.5 (C++), I tried template matching and optical flow segmentation. But now, I would like to implement a more robust algorithm using HOG descriptor.
I made a little example to show the problem. Here are my 2 images :
here the vehicle I want to detect
the image in which I'm searching
PS : I don't want to train a SVM since I want to detect a unique object in a few frames only.
My code :
#include <opencv2/core/utility.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect.hpp>
#include <iostream>
#include <vector>
using namespace std;
using namespace cv;
int main(int argc, char** argv){
//load images
Mat lastSeenObject=imread("lastSeenObject.png",1); //21x39
Mat patch=imread("patch.png",1); //150x150
//params
Size cellSize(8,8);
int nbins= 9;
Size blockSize(2,2);
//my variables
vector<float>templ_descriptor;
vector<float>p_descriptor;
Mat templ_gray,p_gray,iMatches;
vector<DMatch> matches;
//convert to gray
cvtColor(lastSeenObject,templ_gray,CV_BGR2GRAY);
cvtColor(patch,p_gray,CV_BGR2GRAY);
//create hog object
HOGDescriptor hog(Size(templ_gray.cols/cellSize.width*cellSize.width,templ_gray.rows/cellSize.height*cellSize.height),
Size(blockSize.height*cellSize.height,blockSize.width*cellSize.width),
Size(cellSize.height,cellSize.width),
cellSize,
nbins);
// gives --> winSize [32 x 16], blockSize [16 x 16], blockStride [8 x 8], cellSize [8 x 8]
//compute the descriptor of the car
hog.compute(templ_gray,templ_descriptor, Size(cellSize.height,cellSize.width), Size( 0, 0 ));
//templ_descriptor.size() = 108, containing floats between 0 and 1
//compute the descriptor of the patch
hog.compute(p_gray,p_descriptor, Size(cellSize.height,cellSize.width), Size( 0, 0 ));
//p_descriptor.size() = 27540, containing floats between 0 and 1
//compare the descriptors
double err=0;
double min_err = -1;
int idx=-1;
for (unsigned int i =0;i<p_descriptor.size();i++)
{
if(i%templ_descriptor.size()==0 && i!=0) // iterate the computation of error over the templ_descriptor size
{
if(err<min_err || min_err ==-1)
{
min_err = err;
idx = i-nbins;
}
err = 0;
}
//euclidean error distance accumulator between each component of the histogram
err += abs(p_descriptor[i] - templ_descriptor[i%templ_descriptor.size()]);
}
// we get idx = 11655 and err = 5.34021
//convert vector idx in x,y coordonates in the patch
int row= static_cast<int>(idx/patch.cols);
int col = idx%patch.cols;
//show the result
Rect2f found_object(col,row,hog.winSize.width,hog.winSize.height); // [32 x 16 from (105, 77)]
rectangle(patch,found_object,Scalar(0,0,255));
imshow("result",patch);
waitKey(500000);
return 1;
}
My result
Of course the expected result is to have the bounding box on the vehicle.
My questions
1/ How the descriptor returned by the function compute is structured?
I assume there are 9 (nBins) floats describing a cellSize, but I don't get why I have 108/9 = 12 cells in templ_descriptor while the winSize is 16x32 and the cellSize 8x8.
2/ How to retrieve the pixel coordinates of the winSize from p_descriptor which matches the best with templ_descriptor ?
3/ Do you have any other suggestions to solve my issue of redetecting my target after small occlusions ?
Helpful links