Revision history [back]

How to choose index in getDecisionFunction for multiclass SVM

The objective is to train an SVM to classify up to 10 digits handwritten. So the SVM is a muticlass classifier.

As a result of the training, I have got a yml file which indicates the SVM has a total of 1009 support vectors, a class_count of 10 (because there are 10 digits in the training dataset). It all works very fine. But now I would like to use cv::ml::HOGDescripctor::detectMiltiscale(), in order to detect the digits in any other image or video. My problem arrises when I try to use the HOG::setSVMDetector function, since the SVM model I have just created is not compatible (I tried and it returned errors). So I search on the Internet the way to process the SVM support vectors so as to load it in the setSVMDetector function.

I have found loads of forums pointing to this function:

void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector ){
// get the support vectors
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);

CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
           (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
CV_Assert( sv.type() == CV_32F );
hog_detector.clear();

hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.ptr(), sv.cols*sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;

}

But I have analize it, and the ASSERT this code have will launch an exception in runtime since the sv_total is not equal to 1 for my SVM model (it is equal to 1009). So I have to get rid of the ASSERTS, but now, the problem I with which index I should ue in the getDecisionFunction(0, alpha, svidx) since according to documentation, 0 is just for one or two-class models, but mine is 10-class model. I tried using 45, since there will be 45 different equations to solve, but it launches and error, I tried with 1, 2, and with 10, but I never finds the correct digits on my test image.

The code I implemented is the following (I try to use my variable names in Spanish, I love my language jejeje):

cv::Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
cv::Mat alpha, svidx;
double rho = svm->getDecisionFunction(10, alpha, svidx);
vector<float> svmDetector;
svmDetector.clear();
svmDetector.resize(sv.cols + 1);
memcpy(&svmDetector[0], sv.ptr(), sv.cols * sizeof(svmDetector[0]));
svmDetector[sv.cols] = (float)-rho;

And I use the result, svmDetector, in this way with the rest:

hog.setSVMDetector(svmDetector);
hog.detectMultiScale(imagenPrueba, encontrados, 2);
cout << encontrados.size() << endl;
cv::Mat resultadoImagen;
vector<cv::Mat> grises;
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
cv::merge(grises, resultadoImagen);
for(int i=0;i<encontrados.size();i++)
    cv::rectangle(resultadoImagen, encontrados[i], cv::Scalar(0, 0, 255));

cv::imshow("RESULTADO", resultadoImagen);
cv::waitKey(0);

I have read a lot about it, but still cannot figure out how to do this properly. I actually have found some, but they are examples on 2-class SVM, but it seems that to extrapolete the idea from 2-class to N-class is not straitghforward, at least, to me. I would love to upload my code and images, but I have just joind the forum and still can't do it.

I would really appreciate if someonce can give a hand with this, giving ideas to walk in the correct path.

Thanks in advance.

My full code is here:

#include <iostream>
#include "opencv2\imgproc.hpp"
#include "opencv2\highgui.hpp"
#include "opencv2\objdetect.hpp"
#include "opencv2\ml.hpp"
using namespace std;
int main() {

cv::Mat imgOriginal = cv::imread("D://digits.png", cv::IMREAD_GRAYSCALE);

//Cargando letras
vector<cv::Mat> coleccionLetras;
vector<cv::Mat> coleccionLetrasCorregidas;

for (int i = 0; i < imgOriginal.rows; i = i + 20)
    for (int j = 0; j < imgOriginal.cols; j=j+20) {
        cv::Mat letraActual = imgOriginal(cv::Rect(j,i,20,20)).clone();
        coleccionLetras.push_back(letraActual);
    }

//Corrigiendo inclinacion
for (int i = 0; i < coleccionLetras.size(); i++) {
    cv::Moments m = cv::moments(coleccionLetras[i]);
    if (abs(m.mu02) < 1e-2)
        coleccionLetrasCorregidas.push_back(coleccionLetras[i]);
    else {
        cv::Mat warpMat = (cv::Mat_<float>(2, 3) << 1, (float)(m.mu11 / m.mu02), -0.5 * 20 * (float)(m.mu11 / m.mu02), 0, 1, 0);
        cv::Mat imgOut = cv::Mat::zeros(coleccionLetras[i].rows, coleccionLetras[i].cols, coleccionLetras[i].type());
        cv::warpAffine(coleccionLetras[i], imgOut, warpMat, imgOut.size(), cv::WARP_INVERSE_MAP | cv::INTER_LINEAR);
        coleccionLetrasCorregidas.push_back(imgOut);
    }
}

//Cargando informacion para entrenamiento
//Se tiene un total de 10 digitos. Cada digito tiene un total de 500 muestras. Tomaremos las 400 primeras muestras de cada digito para entrenar.
vector<cv::Mat> trainDataSetMat;
vector<cv::Mat> testDataSetMat;
vector<int> trainLabels;
vector<int> testLabels;
for (int digito = 0; digito < 10; digito++)
    for (int muestra = 0; muestra < 400; muestra++) {
        trainDataSetMat.push_back(coleccionLetrasCorregidas[digito*500+muestra]);
        trainLabels.push_back(digito);
    }

for (int digito = 0; digito < 10; digito++)
    for (int muestra = 0; muestra < 100; muestra++) {
        testDataSetMat.push_back(coleccionLetrasCorregidas[digito * 500 + 400 + muestra]);
        testLabels.push_back(digito);
    }

//Obteniendo HOG features para cada dataset
vector<vector<float>> trainDataSetHOG;
vector<vector<float>> testDataSetHOG;

cv::HOGDescriptor hog(
    cv::Size(20, 20),   //winSize
    cv::Size(10, 10),   //blockSize
    cv::Size(5, 5),     //blockStride
    cv::Size(10, 10),   //cellSize
    9,
    1,
    -1.0,
    0,
    0.2,
    0,
    64,
    true
);
for (int i = 0; i<trainDataSetMat.size(); i++) {
    vector<float> HOGfeatures;
    hog.compute(trainDataSetMat[i], HOGfeatures);
    trainDataSetHOG.push_back(HOGfeatures);
}

for (int i = 0; i<testDataSetMat.size(); i++) {
    vector<float> HOGfeatures;
    hog.compute(testDataSetMat[i], HOGfeatures);
    testDataSetHOG.push_back(HOGfeatures);
}

//Convirtiendo a matriz, para entrenamiento
cv::Mat trainMat((int)trainDataSetHOG.size(), (int)trainDataSetHOG[0].size(), CV_32FC1);
cv::Mat testMat((int)testDataSetHOG.size(), (int)testDataSetHOG[0].size(), CV_32FC1);

for (int i = 0; i < trainMat.rows; i++)
    for (int j = 0; j < trainMat.cols; j++)
        trainMat.at<float>(i,j) = trainDataSetHOG[i][j];

for (int i = 0; i < testMat.rows; i++)
    for (int j = 0; j < testMat.cols; j++)
        testMat.at<float>(i, j) = testDataSetHOG[i][j];

cv::Mat respuesta;

cv::Ptr<cv::ml::SVM> svm = cv::ml::SVM::create();
svm->setGamma(0.50625);
svm->setC(12.5);
svm->setKernel(cv::ml::SVM::RBF);
svm->setType(cv::ml::SVM::C_SVC);
cv::Ptr<cv::ml::TrainData> td = cv::ml::TrainData::create(trainMat, cv::ml::ROW_SAMPLE, trainLabels);
svm->train(td);
svm->save("modelo.yml");
svm->predict(testMat, respuesta);

float conteo = 0.0f;
float precision = 0.0f;
for (int i = 0; i < respuesta.rows; i++) {
    if (respuesta.at<float>(i, 0) == testLabels[i])
        conteo++;
}

precision = (conteo / respuesta.rows) * 100.0f;
cout << "Precision " << precision << endl;

cv::Mat imagenPrueba = cv::imread("D://TEST_FINAL.jpg", cv::IMREAD_GRAYSCALE);

vector<cv::Rect> encontrados;

cv::Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
cv::Mat alpha, svidx;
double rho = svm->getDecisionFunction(10, alpha, svidx);
vector<float> svmDetector;
svmDetector.clear();
svmDetector.resize(sv.cols + 1);
memcpy(&svmDetector[0], sv.ptr(), sv.cols * sizeof(svmDetector[0]));
svmDetector[sv.cols] = (float)-rho;

hog.setSVMDetector(svmDetector);
hog.detectMultiScale(imagenPrueba, encontrados, 2);
cout << encontrados.size() << endl;
cv::Mat resultadoImagen;
vector<cv::Mat> grises;
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
grises.push_back(imagenPrueba);
cv::merge(grises, resultadoImagen);
for(int i=0;i<encontrados.size();i++)
    cv::rectangle(resultadoImagen, encontrados[i], cv::Scalar(0, 0, 255));

cv::imshow("RESULTADO", resultadoImagen);
cv::waitKey(0);

return 0;

}