Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

How to detect an object in an image using HOG Descriptors?

When tracking an object, I want to be able to re-detect it after an occlusion.

On OpenCV 3.4.5 (C++), I tried template matching and optical flow segmentation. But now, I would like to implement a more robust algorithm using HOG descriptor.

I made a little example to show the problem. Here are my 2 images :

image description here the vehicle I want to detect

image description the image in which I'm searching

PS : I don't want to train a SVM since I want to detect a unique object in a few frames only.

My code :

#include <opencv2/core/utility.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect.hpp>

#include <iostream>
#include <vector>

using namespace std;
using namespace cv;

int main(int argc, char** argv){

    //load images
     Mat lastSeenObject=imread("lastSeenObject.png",1); //21x39
     Mat patch=imread("patch.png",1); //150x150

    //params
    Size cellSize(8,8);
    int nbins= 9;
    Size blockSize(2,2);

    //my variables
    vector<float>templ_descriptor;
    vector<float>p_descriptor;
    Mat templ_gray,p_gray,iMatches;
    vector<DMatch> matches;

    //convert to gray
    cvtColor(lastSeenObject,templ_gray,CV_BGR2GRAY);
    cvtColor(patch,p_gray,CV_BGR2GRAY);

    //create hog object
    HOGDescriptor hog(Size(templ_gray.cols/cellSize.width*cellSize.width,templ_gray.rows/cellSize.height*cellSize.height),
            Size(blockSize.height*cellSize.height,blockSize.width*cellSize.width),
            Size(cellSize.height,cellSize.width),
            cellSize,
            nbins);
    // gives --> winSize [32 x 16],  blockSize [16 x 16],  blockStride [8 x 8],  cellSize [8 x 8]

    //compute the descriptor of the car
    hog.compute(templ_gray,templ_descriptor, Size(cellSize.height,cellSize.width), Size( 0, 0 ));
    //templ_descriptor.size() = 108, containing floats between 0 and 1

    //compute the descriptor of the patch
    hog.compute(p_gray,p_descriptor, Size(cellSize.height,cellSize.width), Size( 0, 0 ));
    //p_descriptor.size() = 27540, containing floats between 0 and 1

    //compare the descriptors
    double err=0;
    double min_err = -1;
    int idx=-1;
    for (unsigned int i =0;i<p_descriptor.size();i++)
    {
        if(i%templ_descriptor.size()==0 && i!=0) // iterate the computation of error over the templ_descriptor size
        {
            if(err<min_err || min_err ==-1)
            {
                min_err = err;
                idx = i-nbins;
            }
            err = 0;
        }
        //euclidean error distance accumulator between each component of the histogram
        err += abs(p_descriptor[i] - templ_descriptor[i%templ_descriptor.size()]);
    }

    // we get idx = 11655 and err = 5.34021

    //convert vector idx in x,y coordonates in the patch
    int row= static_cast<int>(idx/patch.cols);
    int col = idx%patch.cols;

    //show the result
    Rect2f found_object(col,row,hog.winSize.width,hog.winSize.height); // [32 x 16 from (105, 77)]
    rectangle(patch,found_object,Scalar(0,0,255));
    imshow("result",patch);
    waitKey(500000);

    return 1;

}

My result

image description

Of course the expected result is to have the bounding box on the vehicle.

My questions

1/ How the descriptor returned by the function compute is structured?

I assume there are 9 (nBins) floats describing a cellSize, but I don't get why I have 108/9 = 12 cells in templ_descriptor while the winSize is 16x32 and the cellSize 8x8.

2/ How to retrieve the pixel coordinates of the winSize from p_descriptor which matches the best with templ_descriptor ?

3/ Do you have any other suggestions to solve my issue of redetecting my target after small occlusions ?

Helpful links

OpenCV 3.4.5 documentation on HOG Descriptor

LearnOpenCV article on HOG

How to detect an object in an image using HOG Descriptors?

When tracking an object, I want to be able to re-detect it after an occlusion.

On OpenCV 3.4.5 (C++), I tried template matching and optical flow segmentation. But now, I would like to implement a more robust algorithm using HOG descriptor.

I made a little example to show the problem. Here are my 2 images :

image description here the vehicle I want to detect

image description the image in which I'm searching

PS : I don't want to train a SVM since I want to detect a unique object in a few frames only.

My code :

#include <opencv2/core/utility.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect.hpp>

#include <iostream>
#include <vector>

using namespace std;
using namespace cv;

int main(int argc, char** argv){

    //load images
     Mat lastSeenObject=imread("lastSeenObject.png",1); //21x39
     Mat patch=imread("patch.png",1); //150x150

    //params
    Size cellSize(8,8);
    int nbins= 9;
    Size blockSize(2,2);

    //my variables
    vector<float>templ_descriptor;
    vector<float>p_descriptor;
    Mat templ_gray,p_gray,iMatches;
    vector<DMatch> matches;

    //convert to gray
    cvtColor(lastSeenObject,templ_gray,CV_BGR2GRAY);
    cvtColor(patch,p_gray,CV_BGR2GRAY);

    //create hog object
    HOGDescriptor hog(Size(templ_gray.cols/cellSize.width*cellSize.width,templ_gray.rows/cellSize.height*cellSize.height),
            Size(blockSize.height*cellSize.height,blockSize.width*cellSize.width),
            Size(cellSize.height,cellSize.width),
            cellSize,
            nbins);
    // gives --> winSize [32 x 16],  blockSize [16 x 16],  blockStride [8 x 8],  cellSize [8 x 8]

    //compute the descriptor of the car
    hog.compute(templ_gray,templ_descriptor, Size(cellSize.height,cellSize.width), Size( 0, 0 ));
    //templ_descriptor.size() = 108, containing floats between 0 and 1

    //compute the descriptor of the patch
    hog.compute(p_gray,p_descriptor, Size(cellSize.height,cellSize.width), Size( 0, 0 ));
    //p_descriptor.size() = 27540, containing floats between 0 and 1

    //compare the descriptors
    double err=0;
    double min_err = -1;
    int idx=-1;
    for (unsigned int i =0;i<p_descriptor.size();i++)
    {
        if(i%templ_descriptor.size()==0 && i!=0) // iterate the computation of error over the templ_descriptor size
        {
            if(err<min_err || min_err ==-1)
            {
                min_err = err;
                idx = i-nbins;
            }
            err = 0;
        }
        //euclidean error distance accumulator between each component of the histogram
        err += abs(p_descriptor[i] - templ_descriptor[i%templ_descriptor.size()]);
    }

    // we get idx = 11655 and err = 5.34021

    //convert vector idx in x,y coordonates in the patch
    int row= static_cast<int>(idx/patch.cols);
    int col = idx%patch.cols;

    //show the result
    Rect2f found_object(col,row,hog.winSize.width,hog.winSize.height); // [32 x 16 from (105, 77)]
    rectangle(patch,found_object,Scalar(0,0,255));
    imshow("result",patch);
    waitKey(500000);

     return 1;
}

}

My result

image description

Of course the expected result is to have the bounding box on the vehicle.

My questions

1/ How the descriptor returned by the function compute is structured?

I assume there are 9 (nBins) floats describing a cellSize, but I don't get why I have 108/9 = 12 cells in templ_descriptor while the winSize is 16x32 and the cellSize 8x8.

2/ How to retrieve the pixel coordinates of the winSize from p_descriptor which matches the best with templ_descriptor ?

3/ Do you have any other suggestions to solve my issue of redetecting my target after small occlusions ?

Helpful links

OpenCV 3.4.5 documentation on HOG Descriptor

LearnOpenCV article on HOG