Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

To normalize data using the zero mean-std deviation approach, this is what I use in my programs (probably there are better/more optimized ways):

· For training

//Mat train_features has one descriptor vector per row (corresponding to one sample), and as many rows as samples in the training dataset
        Mat means, sigmas;  //matrices to save all the means and standard deviations
        for (int i = 0; i < train_features.cols; i++){  //take each of the features in the vector
            Mat mean; Mat sigma;
            meanStdDev(train_features.col(i), mean, sigma);  //get mean and std deviation
            means.push_back(mean);
            sigmas.push_back(sigma);
            train_features.col(i) = (train_features.col(i) - mean) / sigma;  //normalization
        }
        //optional steps to save all the parameters
        Mat meansigma;
        hconcat(means, sigmas, meansigma);  //both params in same matrix
        saveMatToCsv(meansigma, "meansigma.csv");  //custom function to save data to .csv file

· For detection/testing (because you have to apply normalization there too)

    //load previously saved means and sigmas (initialization, needed just once)
    Mat meansigma;
    string file = "meansigma.csv";
    loadCsv(file, meansigma);
    Mat means = meansigma.col(0).clone();
    Mat sigmas = meansigma.col(1).clone();

    //inside your for loop, for each frame
    vector<float> descriptors = computeDescriptors();  //change function appropiately
    //normalize descriptors prior to classification
    for (int idx = 0; idx < descriptors.size(); idx++){
        float mean = means.at<float>(idx);
        float sigma = sigmas.at<float>(idx);
        descriptors[idx] = (descriptors[idx] - mean) / sigma;  //normalize vector
    }

Yes, the testing part might seem inefficient with such loop and not using Mat and overloaded operators. I had my reasons to write it that way when I needed it, and I haven't reviewed it lately... everybody's welcomed to improve it. However, for the purposes of the current question, I think it is clearer this way too.

About the saveMatToCsv() and loadCsv() functions, they're just my own custom functions to write to and read from a .csv file. Check this post for more info about them

To normalize data using the zero mean-std deviation approach, this is what I use in my programs (probably there are better/more optimized ways):

· For training

//Mat train_features has one descriptor vector per row (corresponding to one sample), and as many rows as samples in the training dataset
        Mat means, sigmas;  //matrices to save all the means and standard deviations
        for (int i = 0; i < train_features.cols; i++){  //take each of the features in the vector
            Mat mean; Mat sigma;
            meanStdDev(train_features.col(i), mean, sigma);  //get mean and std deviation
            means.push_back(mean);
            sigmas.push_back(sigma);
            train_features.col(i) = (train_features.col(i) - mean) / sigma;  //normalization
        }
        //optional steps to save all the parameters
        Mat meansigma;
        hconcat(means, sigmas, meansigma);  //both params in same matrix
        saveMatToCsv(meansigma, "meansigma.csv");  //custom function to save data to .csv file

· For detection/testing (because you have to apply normalization there too)

    //load previously saved means and sigmas (initialization, needed just once)
    Mat meansigma;
    string file = "meansigma.csv";
    loadCsv(file, meansigma);
    Mat means = meansigma.col(0).clone();
    Mat sigmas = meansigma.col(1).clone();

    //inside your for loop, for each frame
    vector<float> descriptors = computeDescriptors();  //change function appropiately
    //normalize descriptors prior to classification
    for (int idx = 0; idx < descriptors.size(); idx++){
        float mean = means.at<float>(idx);
        float sigma = sigmas.at<float>(idx);
        descriptors[idx] = (descriptors[idx] - mean) / sigma;  //normalize vector
    }

Yes, the testing part might seem inefficient with such loop and not using Mat and overloaded operators. I had my reasons to write it that way when I needed it, and I haven't reviewed it lately... everybody's welcomed to improve it. However, for the purposes of the current question, I think it is clearer this way too.

About the saveMatToCsv() and loadCsv() functions, they're just my own custom functions to write to and read from a .csv file. Check this post for more info about them

To normalize data using the zero mean-std deviation approach, this is what I use in my programs (probably there are better/more optimized ways):

· For training

//Mat train_features has one descriptor vector per row (corresponding to one sample), and as many rows as samples in the training dataset
        Mat means, sigmas;  //matrices to save all the means and standard deviations
        for (int i = 0; i < train_features.cols; i++){  //take each of the features in vector
            Mat mean; Mat sigma;
            meanStdDev(train_features.col(i), mean, sigma);  //get mean and std deviation
            means.push_back(mean);
            sigmas.push_back(sigma);
            train_features.col(i) = (train_features.col(i) - mean) / sigma;  //normalization
        }
        //optional steps to save all the parameters
        Mat meansigma;
        hconcat(means, sigmas, meansigma);  //both params in same matrix
        saveMatToCsv(meansigma, "meansigma.csv");  //custom function to save data to .csv file

· For detection/testing (because you have to apply normalization there too)

    //load previously saved means and sigmas (initialization, needed just once)
    Mat meansigma;
    string file = "meansigma.csv";
    loadCsv(file, meansigma);
    Mat means = meansigma.col(0).clone();
    Mat sigmas = meansigma.col(1).clone();

    //inside your for loop, for each frame
    vector<float> descriptors = computeDescriptors();  //change function appropiately
    //normalize descriptors prior to classification
    for (int idx = 0; idx < descriptors.size(); idx++){
        float mean = means.at<float>(idx);
        float sigma = sigmas.at<float>(idx);
        descriptors[idx] = (descriptors[idx] - mean) / sigma;  //normalize vector
    }

Yes, the testing part might seem inefficient with such loop and not using Mat and overloaded operators. I had my reasons to write it that way when I needed it, and I haven't reviewed it lately... everybody's welcomed to improve it. However, for the purposes of the current question, I think it is clearer this way too.

About the saveMatToCsv() and loadCsv() functions, they're just my own custom functions to write to and read from a .csv file. Check this post for more info about them


UPDATE - complete dummy sample (working without any problems in OpenCV 2.4.12, Win7 x64, VS 2013)

#include <opencv2/core/core.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <fstream>

using namespace cv;
using namespace std;

void saveMatToCsv(Mat &matrix, string filename){
    ofstream outputFile(filename);
    outputFile << format(matrix, "CSV") << endl;
    outputFile.close();
}

void main()
{
    //training data and labels ------------------
    Mat train_features = (Mat_<float>(10, 4) <<
                                            1500, 25, -9, 6,
                                            1495, 31, -8, 8,
                                            1565, 30, -8, 7,
                                            1536, 28, -10, 8,
                                            1504, 29, -4, 6,
                                            2369, 87, 15, 69,
                                            526, 2, 47, 2,
                                            8965, 45, 25, 14,
                                            4500, 14, 36, 8);

    Mat labels = (Mat_<int>(10, 1) << 1, 1, 1, 1, 1, -1, -1, -1, -1, -1);

    //normalizing data --------------------------
    Mat means, sigmas;  //matrices to save all the means and standard deviations
    for (int i = 0; i < train_features.cols; i++){  //take each of the features in vector
        Mat mean; Mat sigma;
        meanStdDev(train_features.col(i), mean, sigma);  //get mean and std deviation
        means.push_back(mean);
        sigmas.push_back(sigma);
        train_features.col(i) = (train_features.col(i) - mean) / sigma;  //normalization
    }
    //optional steps to save all the parameters
    Mat meansigma;
    hconcat(means, sigmas, meansigma);  //both params in same matrix
    saveMatToCsv(meansigma, "meansigma.csv");  //custom function to save data to .csv file

    //training SVM --------------------
    SVM svm;
    svm.train(train_features, labels);
    svm.save("svm.xml");

    //loading previously saved SVM and predicting ------------------
    SVM new_svm;
    new_svm.load("svm.xml");

    Mat new_sample1 = (Mat_<float>(4, 1) << 1520, 26, -9, 7);   //seems like a positive
    Mat new_sample2 = (Mat_<float>(4, 1) << 325, 57, 14, 36);   //seems like a negative

    //load saved means and sigmas
    CvMLData mlData;
    mlData.read_csv("meansigma.csv");
    const CvMat* tmp = mlData.get_values();
    Mat new_meansigma(tmp, true);
    tmp->CvMat::~CvMat();
    Mat new_means = new_meansigma.col(0).clone();
    Mat new_sigmas = new_meansigma.col(1).clone();

    //normalize descriptors prior to classification (overloaded functions this time)
    new_sample1 = (new_sample1 - new_means) / new_sigmas;
    new_sample2 = (new_sample2 - new_means) / new_sigmas;

    double predictedClass1 = new_svm.predict(new_sample1);
    double predictedClass2 = new_svm.predict(new_sample2);
    cout << "First sample's class: " << predictedClass1 << endl;
    cout << "Second sample's class: " << predictedClass2 << endl;
}

Results: First sample's class: 1, Second sample's class: -1