The objective is to train an SVM to classify up to 10 digits handwritten. So the SVM is a muticlass classifier.
As a result of the training, I have got a yml file which indicates the SVM has a total of 1009 support vectors, a class_count of 10 (because there are 10 digits in the training dataset). It all works very fine. But now I would like to use cv::ml::HOGDescripctor::detectMiltiscale(), in order to detect the digits in any other image or video. My problem arrises when I try to use the HOG::setSVMDetector function, since the SVM model I have just created is not compatible (I tried and it returned errors). So I search on the Internet the way to process the SVM support vectors so as to load it in the setSVMDetector function.
I have found loads of forums pointing to this function:
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector ){
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);
CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
CV_Assert( sv.type() == CV_32F );
hog_detector.clear();
hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;
}
But I have analize it, and the ASSERT this code have will launch an exception in runtime since the sv_total is not equal to 1 for my SVM model (it is equal to 1009). So I have to get rid of the ASSERTS, but now, the problem I with which index I should ue in the getDecisionFunction(0, alpha, svidx) since according to documentation, 0 is just for one or two-class models, but mine is 10-class model. I tried using 45, since there will be 45 different equations to solve, but it launches and error, I tried with 1, 2, and with 10, but I never finds the correct digits on my test image.
The code I implemented is the following (I try to use my variable names in Spanish, I love my language jejeje):
cv::Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
cv::Mat alpha, svidx;
double rho = svm->getDecisionFunction(10, alpha, svidx);
vector<float> svmDetector;
svmDetector.clear();
svmDetector.resize(sv.cols + 1);
memcpy(&svmDetector[0], sv.ptr(), sv.cols * sizeof(svmDetector[0]));
svmDetector[sv.cols] = (float)-rho;
And I use the result, svmDetector, in this way with the rest:
hog.setSVMDetector(svmDetector);
hog.detectMultiScale(imagenPrueba, encontrados, 2);
cout << encontrados.size() << endl;
cv::Mat resultadoImagen;
vector<cv::Mat> grises;
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
cv::merge(grises, resultadoImagen);
for(int i=0;i<encontrados.size();i++)
cv::rectangle(resultadoImagen, encontrados[i], cv::Scalar(0, 0, 255));
cv::imshow("RESULTADO", resultadoImagen);
cv::waitKey(0);
I have read a lot about it, but still cannot figure out how to do this properly. I actually have found some, but they are examples on 2-class SVM, but it seems that to extrapolete the idea from 2-class to N-class is not straitghforward, at least, to me. I would love to upload my code and images, but I have just joind the forum and still can't do it.
I would really appreciate if someonce can give a hand with this, giving ideas to walk in the correct path.
Thanks in advance.
My full code is here:
#include <iostream>
#include "opencv2\imgproc.hpp"
#include "opencv2\highgui.hpp"
#include "opencv2\objdetect.hpp"
#include "opencv2\ml.hpp"
using namespace std;
int main() {
cv::Mat imgOriginal = cv::imread("D://digits.png", cv::IMREAD_GRAYSCALE);
//Cargando letras
vector<cv::Mat> coleccionLetras;
vector<cv::Mat> coleccionLetrasCorregidas;
for (int i = 0; i < imgOriginal.rows; i = i + 20)
for (int j = 0; j < imgOriginal.cols; j=j+20) {
cv::Mat letraActual = imgOriginal(cv::Rect(j,i,20,20)).clone();
coleccionLetras.push_back(letraActual);
}
//Corrigiendo inclinacion
for (int i = 0; i < coleccionLetras.size(); i++) {
cv::Moments m = cv::moments(coleccionLetras[i]);
if (abs(m.mu02) < 1e-2)
coleccionLetrasCorregidas.push_back(coleccionLetras[i]);
else {
cv::Mat warpMat = (cv::Mat_<float>(2, 3) << 1, (float)(m.mu11 / m.mu02), -0.5 * 20 * (float)(m.mu11 / m.mu02), 0, 1, 0);
cv::Mat imgOut = cv::Mat::zeros(coleccionLetras[i].rows, coleccionLetras[i].cols, coleccionLetras[i].type());
cv::warpAffine(coleccionLetras[i], imgOut, warpMat, imgOut.size(), cv::WARP_INVERSE_MAP | cv::INTER_LINEAR);
coleccionLetrasCorregidas.push_back(imgOut);
}
}
//Cargando informacion para entrenamiento
//Se tiene un total de 10 digitos. Cada digito tiene un total de 500 muestras. Tomaremos las 400 primeras muestras de cada digito para entrenar.
vector<cv::Mat> trainDataSetMat;
vector<cv::Mat> testDataSetMat;
vector<int> trainLabels;
vector<int> testLabels;
for (int digito = 0; digito < 10; digito++)
for (int muestra = 0; muestra < 400; muestra++) {
trainDataSetMat.push_back(coleccionLetrasCorregidas[digito*500+muestra]);
trainLabels.push_back(digito);
}
for (int digito = 0; digito < 10; digito++)
for (int muestra = 0; muestra < 100; muestra++) {
testDataSetMat.push_back(coleccionLetrasCorregidas[digito * 500 + 400 + muestra]);
testLabels.push_back(digito);
}
//Obteniendo HOG features para cada dataset
vector<vector<float>> trainDataSetHOG;
vector<vector<float>> testDataSetHOG;
cv::HOGDescriptor hog(
cv::Size(20, 20), //winSize
cv::Size(10, 10), //blockSize
cv::Size(5, 5), //blockStride
cv::Size(10, 10), //cellSize
9,
1,
-1.0,
0,
0.2,
0,
64,
true
);
for (int i = 0; i<trainDataSetMat.size(); i++) {
vector<float> HOGfeatures;
hog.compute(trainDataSetMat[i], HOGfeatures);
trainDataSetHOG.push_back(HOGfeatures);
}
for (int i = 0; i<testDataSetMat.size(); i++) {
vector<float> HOGfeatures;
hog.compute(testDataSetMat[i], HOGfeatures);
testDataSetHOG.push_back(HOGfeatures);
}
//Convirtiendo a matriz, para entrenamiento
cv::Mat trainMat((int)trainDataSetHOG.size(), (int)trainDataSetHOG[0].size(), CV_32FC1);
cv::Mat testMat((int)testDataSetHOG.size(), (int)testDataSetHOG[0].size(), CV_32FC1);
for (int i = 0; i < trainMat.rows; i++)
for (int j = 0; j < trainMat.cols; j++)
trainMat.at<float>(i,j) = trainDataSetHOG[i][j];
for (int i = 0; i < testMat.rows; i++)
for (int j = 0; j < testMat.cols; j++)
testMat.at<float>(i, j) = testDataSetHOG[i][j];
cv::Mat respuesta;
cv::Ptr<cv::ml::SVM> svm = cv::ml::SVM::create();
svm->setGamma(0.50625);
svm->setC(12.5);
svm->setKernel(cv::ml::SVM::RBF);
svm->setType(cv::ml::SVM::C_SVC);
cv::Ptr<cv::ml::TrainData> td = cv::ml::TrainData::create(trainMat, cv::ml::ROW_SAMPLE, trainLabels);
svm->train(td);
svm->save("modelo.yml");
svm->predict(testMat, respuesta);
float conteo = 0.0f;
float precision = 0.0f;
for (int i = 0; i < respuesta.rows; i++) {
if (respuesta.at<float>(i, 0) == testLabels[i])
conteo++;
}
precision = (conteo / respuesta.rows) * 100.0f;
cout << "Precision " << precision << endl;
cv::Mat imagenPrueba = cv::imread("D://TEST_FINAL.jpg", cv::IMREAD_GRAYSCALE);
vector<cv::Rect> encontrados;
cv::Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
cv::Mat alpha, svidx;
double rho = svm->getDecisionFunction(10, alpha, svidx);
vector<float> svmDetector;
svmDetector.clear();
svmDetector.resize(sv.cols + 1);
memcpy(&svmDetector[0], sv.ptr(), sv.cols * sizeof(svmDetector[0]));
svmDetector[sv.cols] = (float)-rho;
hog.setSVMDetector(svmDetector);
hog.detectMultiScale(imagenPrueba, encontrados, 2);
cout << encontrados.size() << endl;
cv::Mat resultadoImagen;
vector<cv::Mat> grises;
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
cv::merge(grises, resultadoImagen);
for(int i=0;i<encontrados.size();i++)
cv::rectangle(resultadoImagen, encontrados[i], cv::Scalar(0, 0, 255));
cv::imshow("RESULTADO", resultadoImagen);
cv::waitKey(0);
return 0;
}