I picked the code for train_HOG.cpp from openCV site. I went through all the steps carefully, but not able to get the accuracy even in same resolution image (original image from which I created positive images.). Attaching output images and code here to analyse. I put red circle on area that was not found by HOG utility. I need the image should be recognized if source images is of different scale (e.g. 50%, 75%, 125%, 150%).
Awaiting response/feed back/comments.
#include "stdafx.h"
#include "TrainHOGEx.h"
TrainHOGEx::TrainHOGEx()
{
}
TrainHOGEx::~TrainHOGEx()
{
}
int mainTestEx();
void get_svm_detectorex(const Ptr< SVM > & svm, vector< float > & hog_detector);
void convert_to_mlex(const std::vector< Mat > & train_samples, Mat& trainData);
void load_images(const String & dirname, vector< Mat > & img_lst, bool showImages);
void sample_negex(const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size);
void computeHOGs(const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst);
int test_trained_detector(String obj_det_filename, String test_dir, String videofilename);
void get_svm_detectorex(const Ptr< SVM >& svm, vector< float > & hog_detector)
{
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);
CV_Assert(alpha.total() == 1 && svidx.total() == 1 && sv_total == 1);
CV_Assert((alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f));
CV_Assert(sv.type() == CV_32F);
hog_detector.clear();
hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.ptr(), sv.cols * sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;
}
/*
* Convert training/testing set to be used by OpenCV Machine Learning algorithms.
* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
* Transposition of samples are made if needed.
*/
void convert_to_mlex(const vector< Mat > & train_samples, Mat& trainData)
{
//--Convert data
const int rows = (int)train_samples.size();
const int cols = (int)std::max(train_samples[0].cols, train_samples[0].rows);
Mat tmp(1, cols, CV_32FC1); //< used for transposition if needed
trainData = Mat(rows, cols, CV_32FC1);
for (size_t i = 0; i < train_samples.size(); ++i)
{
CV_Assert(train_samples[i].cols == 1 || train_samples[i].rows == 1);
if (train_samples[i].cols == 1)
{
transpose(train_samples[i], tmp);
tmp.copyTo(trainData.row((int)i));
}
else if (train_samples[i].rows == 1)
{
train_samples[i].copyTo(trainData.row((int)i));
}
}
}
void load_images(const String & dirname, vector< Mat > & img_lst, bool showImages = false)
{
try {
vector< String > files;
try {
glob(dirname, files);
}
catch (...)
{
AfxMessageBox("exception");
}
for (size_t i = 0; i < files.size(); ++i)
{
Mat img = imread(files[i]); // load the image
if (img.empty()) // invalid image, skip it.
{
cout << files[i] << " is invalid!" << endl;
continue;
}
if (showImages)
{
imshow("image", img);
waitKey(1);
}
img_lst.push_back(img);
}
}
catch (...)
{
AfxMessageBox("exception");
}
}
void sample_negex(const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size)
{
Rect box;
box.width = size.width;
box.height = size.height;
const int size_x = box.width;
const int size_y = box.height;
srand((unsigned int)time(NULL));
for (size_t i = 0; i < full_neg_lst.size(); i++)
{
box.x = rand() % (full_neg_lst[i].cols - size_x);
box.y = rand() % (full_neg_lst[i].rows - size_y);
Mat roi = full_neg_lst[i](box);
neg_lst.push_back(roi.clone());
}
}
void computeHOGs(const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst)
{
HOGDescriptor hog;
hog.winSize = wsize;
Rect r = Rect(0, 0, wsize.width, wsize.height);
r.x += (img_lst[0].cols - r.width) / 2;
r.y += (img_lst[0].rows - r.height) / 2;
Mat gray;
vector< float > descriptors;
for (size_t i = 0; i < img_lst.size(); i++)
{
cvtColor(img_lst[i](r), gray, COLOR_BGR2GRAY);
hog.compute(gray, descriptors, Size(8, 8), Size(0, 0));
gradient_lst.push_back(Mat(descriptors).clone());
}
}
int test_trained_detector(String obj_det_filename, String test_dir, String videofilename)
{
cout << "Testing trained detector..." << endl;
HOGDescriptor hog;
hog.load(obj_det_filename);
vector< String > files;
glob(test_dir, files);
int delay = 0;
obj_det_filename = "testing " + obj_det_filename;
namedWindow(obj_det_filename, WINDOW_NORMAL);
for (size_t i = 0;; i++)
{
Mat img;
if (i < files.size())
{
img = imread(files[i]);
}
if (img.empty())
{
return 0;
}
vector< Rect > detections;
vector< double > foundWeights;
hog.detectMultiScale(img, detections, foundWeights);
float max = 0.0f;
for (size_t j = 0; j < foundWeights.size(); j++)
{
if (foundWeights[j] > max)
max = foundWeights[j];
}
for (size_t j = 0; j < detections.size(); j++)
{
if (foundWeights[j] > max ) //0 //0.1f // max - 0.1f
{
Scalar color = Scalar(0, foundWeights[j] * foundWeights[j] * 200, 0);
rectangle(img, detections[j], color, img.cols / 400 + 1);
}
}
imshow(obj_det_filename, img);
if (27 == waitKey(delay))
{
return 0;
}
}
return 0;
}
int TrainHOGEx::Train(CString csDirPosImgs, CString str_dir, CString csImgPath)
{
string pos_dir = csDirPosImgs.GetBuffer();
//string neg_dir = str_dir + "\\N";
string neg_dir = "D:\\train1\\N";
String obj_det_filename = str_dir + "\\detector.yml";
int detector_width = 0;
int detector_height = 0;
bool test_detector = false;
bool train_twice = true;
bool visualization = false;
if (pos_dir.empty() || neg_dir.empty())
{
AfxMessageBox("Positive and Negative dir empty.");
}
vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;
vector< int > labels;
clog << "Positive images are being loaded...";
load_images(pos_dir, pos_lst, visualization);
if (pos_lst.size() > 0)
{
clog << "...[done]" << endl;
}
else
{
clog << "no image in " << pos_dir << endl;
return 1;
}
Size pos_image_size = pos_lst[0].size();
// get width and height
Mat img = pos_lst[0];
detector_width = img.cols;
detector_height = img.rows;
Mat imgTarget;
imgTarget = imread(csImgPath.GetBuffer());
detector_width = imgTarget.cols;
detector_height = imgTarget.rows;
//get trimed width and height (train function)
detector_width = detector_width / 8 * 8;
detector_height = detector_height / 8 * 8;
if (detector_width && detector_height)
{
pos_image_size = Size(detector_width, detector_height);
}
else
{
//for (size_t i = 0; i < pos_lst.size(); ++i)
//{
// if (pos_lst[i].size() != pos_image_size)
// {
// cout << "All positive images should be same size!" << endl;
// //exit(1);
// }
//}
pos_image_size = pos_image_size / 8 * 8;
}
labels.assign(pos_lst.size(), +1);
const unsigned int old = (unsigned int)labels.size();
clog << "Negative images are being loaded...";
load_images(neg_dir, full_neg_lst, false);
sample_negex(full_neg_lst, neg_lst, pos_image_size);
clog << "...[done]" << endl;
labels.insert(labels.end(), neg_lst.size(), -1);
CV_Assert(old < labels.size());
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs(pos_image_size, pos_lst, gradient_lst);
clog << "...[done]" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs(pos_image_size, neg_lst, gradient_lst);
clog << "...[done]" << endl;
Mat train_data;
convert_to_mlex(gradient_lst, train_data);
clog << "Training SVM...";
Ptr< SVM > svm = SVM::create();
/* Default values to train SVM */
svm->setCoef0(0.0);
svm->setDegree(3);
svm->setTermCriteria(TermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 1e-3));
svm->setGamma(0);
svm->setKernel(SVM::LINEAR);
svm->setNu(0.5);
svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
svm->setC(0.01); // From paper, soft classifier
svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
svm->train(train_data, ROW_SAMPLE, Mat(labels));
clog << "...[done]" << endl;
if (train_twice)
{
clog << "Testing trained detector on negative images. This may take a few minutes...";
HOGDescriptor my_hog;
my_hog.winSize = pos_image_size;
// Set the trained svm to my_hog
vector< float > hog_detector;
get_svm_detectorex(svm, hog_detector);
my_hog.setSVMDetector(hog_detector);
vector< Rect > detections;
vector< double > foundWeights;
for (size_t i = 0; i < full_neg_lst.size(); i++)
{
my_hog.detectMultiScale(full_neg_lst[i], detections, foundWeights);
for (size_t j = 0; j < detections.size(); j++)
{
Mat detection = full_neg_lst[i](detections[j]).clone();
resize(detection, detection, pos_image_size);
neg_lst.push_back(detection);
}
if (visualization)
{
for (size_t j = 0; j < detections.size(); j++)
{
rectangle(full_neg_lst[i], detections[j], Scalar(0, 255, 0), 2);
}
imshow("testing trained detector on negative images", full_neg_lst[i]);
waitKey(5);
}
}
clog << "...[done]" << endl;
labels.clear();
labels.assign(pos_lst.size(), +1);
labels.insert(labels.end(), neg_lst.size(), -1);
gradient_lst.clear();
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs(pos_image_size, pos_lst, gradient_lst);
clog << "...[done]" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs(pos_image_size, neg_lst, gradient_lst);
clog << "...[done]" << endl;
clog << "Training SVM again...";
convert_to_mlex(gradient_lst, train_data);
svm->train(train_data, ROW_SAMPLE, Mat(labels));
clog << "...[done]" << endl;
}
vector< float > hog_detector;
get_svm_detectorex(svm, hog_detector);
HOGDescriptor hog;
hog.winSize = pos_image_size;
hog.setSVMDetector(hog_detector);
hog.save(obj_det_filename);
return 0;
}
void TrainHOGEx::ShowImage(CString cs_img)
{
IplImage * img = 0;
uchar *data;
int i, j, k;
int height, width, widthStep, nChannels;
img = cvLoadImage(cs_img.GetBuffer(), 3);
height = img->height;
width = img->width;
widthStep = img->widthStep;
nChannels = img->nChannels;
data = (uchar*)img->imageData;
IplImage* img1 = cvCreateImage(cvSize(height / 2, width / 2), IPL_DEPTH_8U, nChannels);
for (i = 0; i<height / 2; i++) {
for (j = 0; j<width / 2; j++) {
for (k = 0; k<3; k++) {
img1->imageData[i*widthStep + j*nChannels] = data[i*widthStep + j*nChannels];
}
}
}
cvShowImage("image_Originale2", img1);
cvWaitKey(0);
cvReleaseImage(&img);
return ;
}
int GetDimension(int& width, int& height)
{
for (int n = 0; n < 10; n++)
{
if (width % 8 != 0)
width++;
else
break;
}
for (int n = 0; n < 10; n++)
{
if (height % 8 != 0)
height++;
else
break;
}
return 0;
}
int TrainHOGEx::CreatePositiveImage(CString cs_imgPath, CString csDirToSave)
{
bool nagative = false;
int i = 0;
char* nullname = (char*)"(NULL)";
char* vecname = "D:\\1\\appservice.vec"; /* .vec file name */
char* infoname = NULL; //"D:\\1\\appservice.info"; /* file name with marked up image descriptions */
char* imagename = cs_imgPath.GetBuffer(); // "D:\\1\\AppService.png"; /* single sample image */
char* bgfilename = NULL; //"D:\\1\\bg.txt"; /* background */
int num = 40;
int bgcolor = 0;
int bgthreshold = 0; //80
int invert = 0;
int maxintensitydev = 40;
double maxxangle = 0;
double maxyangle = 0;
double maxzangle = 0;
int showsamples = 0;
/* the samples are adjusted to this scale in the sample preview window */
Mat img;
img = imread(imagename);
double scale = 40.0;
int width = img.cols; //96; // 83;
int height = img.rows; // 20;
int widthOrig = width;
int heightOrig = height;
/*int nx = width / 8;
if (nx*8 < width)
{
width = nx * 8 + 16;
}*/
for (int n = 0; n < 10; n++)
{
if (width % 8 != 0)
width++;
else
break;
}
/*int ny = height / 8;
if (ny * 8 < height)
{
height = ny * 8 + 16;
}*/
for (int n = 0; n < 10; n++)
{
if (height % 8 != 0)
height++;
else
break;
}
widthOrig = width;
heightOrig = height;
double maxscale = -1;
if (nagative)
{
maxxangle = 20;
maxyangle = 50;
maxzangle = 10;
bgthreshold = 5;
num = 50;
invert = true;
maxintensitydev = 0;
bgcolor = 50;
}
srand((unsigned int)time(0));
bool b = false;
/* determine action */
if (imagename && vecname)
{
printf("Create training samples from single image applying distortions...\n");
cvCreateTrainingSamples(vecname, imagename, bgcolor, bgthreshold, bgfilename,
num, invert, maxintensitydev,
maxxangle, maxyangle, maxzangle,
showsamples, width, height, csDirToSave);
/*width = widthOrig;
height = heightOrig;
width = width * 75 / 100;
height = height * 75 / 100;
cvCreateTrainingSamples(vecname, imagename, bgcolor, bgthreshold, bgfilename,
num, invert, maxintensitydev,
maxxangle, maxyangle, maxzangle,
showsamples, width, height, csDirToSave);*/
/*width = widthOrig;
height = heightOrig;
width = width * 125 / 100;
height = height * 125 / 100;
GetDimension(width, height);
cvCreateTrainingSamples(vecname, imagename, bgcolor, bgthreshold, bgfilename,
num, invert, maxintensitydev,
maxxangle, maxyangle, maxzangle,
showsamples, width, height, csDirToSave);*/
printf("Done\n");
}
else if (imagename && bgfilename && infoname)
{
printf("Create test samples from single image applying distortions...\n");
cvCreateTestSamples(infoname, imagename, bgcolor, bgthreshold, bgfilename, num,
invert, maxintensitydev,
maxxangle, maxyangle, maxzangle, showsamples, width, height, maxscale);
printf("Done\n");
}
else if (infoname && vecname)
{
int total;
printf("Create training samples from images collection...\n");
total = cvCreateTrainingSamplesFromInfo(infoname, vecname, num, showsamples,
width, height);
printf("Done. Created %d samples\n", total);
}
else if (vecname)
{
printf("View samples from vec file (press ESC to exit)...\n");
cvShowVecSamples(vecname, width, height, scale);
printf("Done\n");
}
else
{
printf("Nothing to do\n");
}
return 0;
}
void TrainHOGEx::Search(CString csDetectorPath, CString test_dir)
{
CString obj_det_filename;
obj_det_filename = csDetectorPath + "\\detector.yml";
test_trained_detector(obj_det_filename.GetBuffer(), test_dir.GetBuffer(), "");
}
VOID TrainHOGEx::TestEx()
{
mainTestEx();
}
int mainTestEx()
{
/*
// Read image
Mat im = imread("D:\\1\\sa.png");
// Select ROI
Rect2d r = selectROI(im);
// Crop image
Mat imCrop = im(r);
// Display Cropped Image
imshow("Image", imCrop);
waitKey(0);
return 0;
*/
const char* keys =
{
"{help h| | show help message}"
"{pd | | path of directory contains possitive images}"
"{nd | | path of directory contains negative images}"
"{td | | path of directory contains test images}"
"{tv | | test video file name}"
"{dw | | width of the detector}"
"{dh | | height of the detector}"
"{d |false| train twice}"
"{t |false| test a trained detector}"
"{v |false| visualize training steps}"
"{fn |my_detector.yml| file name of trained SVM}"
};
string pos_dir = "D:\\1\\P";
string neg_dir = "D:\\1\\N";
//String pos_dir = parser.get< String >("pd");
//String neg_dir = parser.get< String >("nd");
String test_dir = "D:\\1\\P";// parser.get< String >("td");
String obj_det_filename = "D:\\1\\de.ylm";// parser.get< String >("fn");
int detector_width = 80;// 83;
int detector_height = 16;
bool test_detector = false; //parser.get< bool >("t");
bool train_twice = true;// parser.get< bool >("d");
bool visualization = true;// parser.get< bool >("v");
string videofilename = "D:\\1\\sa.png";
if (test_detector)
{
test_trained_detector(obj_det_filename, test_dir, videofilename);
return 0;
}
bool testPoc = true;
if (testPoc)
{
detector_width = 96;// 83;
detector_height = 40;
pos_dir = "D:\\1\\HOG\\text-pos";
neg_dir = "D:\\1\\HOG\\eggs-neg";
obj_det_filename = "D:\\1\\HOG\\build\\Release\\my_text_detector.yml";
test_dir = "D:\\1\\HOG\\text-test";
test_trained_detector(obj_det_filename, test_dir, videofilename);
return 0;
}
if (pos_dir.empty() || neg_dir.empty())
{
}
vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;
vector< int > labels;
clog << "Positive images are being loaded...";
load_images(pos_dir, pos_lst, visualization);
if (pos_lst.size() > 0)
{
clog << "...[done]" << endl;
}
else
{
clog << "no image in " << pos_dir << endl;
return 1;
}
Size pos_image_size = pos_lst[0].size();
for (size_t i = 0; i < pos_lst.size(); ++i)
{
if (pos_lst[i].size() != pos_image_size)
{
cout << "All positive images should be same size!" << endl;
exit(1);
}
}
pos_image_size = pos_image_size / 8 * 8;
if (detector_width && detector_height)
{
pos_image_size = Size(detector_width, detector_height);
}
labels.assign(pos_lst.size(), +1);
const unsigned int old = (unsigned int)labels.size();
clog << "Negative images are being loaded...";
load_images(neg_dir, full_neg_lst, false);
sample_negex(full_neg_lst, neg_lst, pos_image_size);
clog << "...[done]" << endl;
labels.insert(labels.end(), neg_lst.size(), -1);
CV_Assert(old < labels.size());
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs(pos_image_size, pos_lst, gradient_lst);
clog << "...[done]" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs(pos_image_size, neg_lst, gradient_lst);
clog << "...[done]" << endl;
Mat train_data;
convert_to_mlex(gradient_lst, train_data);
clog << "Training SVM...";
Ptr< SVM > svm = SVM::create();
/* Default values to train SVM */
svm->setCoef0(0.0);
svm->setDegree(3);
svm->setTermCriteria(TermCriteria(CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 1e-3));
svm->setGamma(0);
svm->setKernel(SVM::LINEAR);
svm->setNu(0.5);
svm->setP(0.1); // for EPSILON_SVR, epsilon in loss function?
svm->setC(0.01); // From paper, soft classifier
svm->setType(SVM::EPS_SVR); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
svm->train(train_data, ROW_SAMPLE, Mat(labels));
clog << "...[done]" << endl;
if (train_twice)
{
clog << "Testing trained detector on negative images. This may take a few minutes...";
HOGDescriptor my_hog;
my_hog.winSize = pos_image_size;
// Set the trained svm to my_hog
vector< float > hog_detector;
get_svm_detectorex(svm, hog_detector);
my_hog.setSVMDetector(hog_detector);
vector< Rect > detections;
vector< double > foundWeights;
for (size_t i = 0; i < full_neg_lst.size(); i++)
{
my_hog.detectMultiScale(full_neg_lst[i], detections, foundWeights);
for (size_t j = 0; j < detections.size(); j++)
{
Mat detection = full_neg_lst[i](detections[j]).clone();
resize(detection, detection, pos_image_size);
neg_lst.push_back(detection);
}
if (visualization)
{
for (size_t j = 0; j < detections.size(); j++)
{
rectangle(full_neg_lst[i], detections[j], Scalar(0, 255, 0), 2);
}
imshow("testing trained detector on negative images", full_neg_lst[i]);
waitKey(5);
}
}
clog << "...[done]" << endl;
labels.clear();
labels.assign(pos_lst.size(), +1);
labels.insert(labels.end(), neg_lst.size(), -1);
gradient_lst.clear();
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs(pos_image_size, pos_lst, gradient_lst);
clog << "...[done]" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs(pos_image_size, neg_lst, gradient_lst);
clog << "...[done]" << endl;
clog << "Training SVM again...";
convert_to_mlex(gradient_lst, train_data);
svm->train(train_data, ROW_SAMPLE, Mat(labels));
clog << "...[done]" << endl;
}
vector< float > hog_detector;
get_svm_detectorex(svm, hog_detector);
HOGDescriptor hog;
hog.winSize = pos_image_size;
hog.setSVMDetector(hog_detector);
hog.save(obj_det_filename);
test_trained_detector(obj_det_filename, test_dir, videofilename);
return 0;
}