1 | initial version |
To normalize data using the zero mean-std deviation approach, this is what I use in my programs (probably there are better/more optimized ways):
· For training
//Mat train_features has one descriptor vector per row (corresponding to one sample), and as many rows as samples in the training dataset
Mat means, sigmas; //matrices to save all the means and standard deviations
for (int i = 0; i < train_features.cols; i++){ //take each of the features in the vector
Mat mean; Mat sigma;
meanStdDev(train_features.col(i), mean, sigma); //get mean and std deviation
means.push_back(mean);
sigmas.push_back(sigma);
train_features.col(i) = (train_features.col(i) - mean) / sigma; //normalization
}
//optional steps to save all the parameters
Mat meansigma;
hconcat(means, sigmas, meansigma); //both params in same matrix
saveMatToCsv(meansigma, "meansigma.csv"); //custom function to save data to .csv file
· For detection/testing (because you have to apply normalization there too)
//load previously saved means and sigmas (initialization, needed just once)
Mat meansigma;
string file = "meansigma.csv";
loadCsv(file, meansigma);
Mat means = meansigma.col(0).clone();
Mat sigmas = meansigma.col(1).clone();
//inside your for loop, for each frame
vector<float> descriptors = computeDescriptors(); //change function appropiately
//normalize descriptors prior to classification
for (int idx = 0; idx < descriptors.size(); idx++){
float mean = means.at<float>(idx);
float sigma = sigmas.at<float>(idx);
descriptors[idx] = (descriptors[idx] - mean) / sigma; //normalize vector
}
Yes, the testing part might seem inefficient with such loop and not using Mat and overloaded operators. I had my reasons to write it that way when I needed it, and I haven't reviewed it lately... everybody's welcomed to improve it. However, for the purposes of the current question, I think it is clearer this way too.
About the saveMatToCsv()
and loadCsv()
functions, they're just my own custom functions to write to and read from a .csv file. Check this post for more info about them
2 | No.2 Revision |
To normalize data using the zero mean-std deviation approach, this is what I use in my programs (probably there are better/more optimized ways):
· For training
//Mat train_features has one descriptor vector per row (corresponding to one sample), and as many rows as samples in the training dataset
Mat means, sigmas; //matrices to save all the means and standard deviations
for (int i = 0; i < train_features.cols; i++){ //take each of the features in the vector
Mat mean; Mat sigma;
meanStdDev(train_features.col(i), mean, sigma); //get mean and std deviation
means.push_back(mean);
sigmas.push_back(sigma);
train_features.col(i) = (train_features.col(i) - mean) / sigma; //normalization
}
//optional steps to save all the parameters
Mat meansigma;
hconcat(means, sigmas, meansigma); //both params in same matrix
saveMatToCsv(meansigma, "meansigma.csv"); //custom function to save data to .csv file
· For detection/testing (because you have to apply normalization there too)
//load previously saved means and sigmas (initialization, needed just once)
Mat meansigma;
string file = "meansigma.csv";
loadCsv(file, meansigma);
Mat means = meansigma.col(0).clone();
Mat sigmas = meansigma.col(1).clone();
//inside your for loop, for each frame
vector<float> descriptors = computeDescriptors(); //change function appropiately
//normalize descriptors prior to classification
for (int idx = 0; idx < descriptors.size(); idx++){
float mean = means.at<float>(idx);
float sigma = sigmas.at<float>(idx);
descriptors[idx] = (descriptors[idx] - mean) / sigma; //normalize vector
}
Yes, the testing part might seem inefficient with such loop and not using Mat and overloaded operators. I had my reasons to write it that way when I needed it, and I haven't reviewed it lately... everybody's welcomed to improve it. However, for the purposes of the current question, I think it is clearer this way too.
About the saveMatToCsv()
and loadCsv()
functions, they're just my own custom functions to write to and read from a .csv file. Check this post for more info about them
3 | No.3 Revision |
To normalize data using the zero mean-std deviation approach, this is what I use in my programs (probably there are better/more optimized ways):
· For training
//Mat train_features has one descriptor vector per row (corresponding to one sample), and as many rows as samples in the training dataset
Mat means, sigmas; //matrices to save all the means and standard deviations
for (int i = 0; i < train_features.cols; i++){ //take each of the features in vector
Mat mean; Mat sigma;
meanStdDev(train_features.col(i), mean, sigma); //get mean and std deviation
means.push_back(mean);
sigmas.push_back(sigma);
train_features.col(i) = (train_features.col(i) - mean) / sigma; //normalization
}
//optional steps to save all the parameters
Mat meansigma;
hconcat(means, sigmas, meansigma); //both params in same matrix
saveMatToCsv(meansigma, "meansigma.csv"); //custom function to save data to .csv file
· For detection/testing (because you have to apply normalization there too)
//load previously saved means and sigmas (initialization, needed just once)
Mat meansigma;
string file = "meansigma.csv";
loadCsv(file, meansigma);
Mat means = meansigma.col(0).clone();
Mat sigmas = meansigma.col(1).clone();
//inside your for loop, for each frame
vector<float> descriptors = computeDescriptors(); //change function appropiately
//normalize descriptors prior to classification
for (int idx = 0; idx < descriptors.size(); idx++){
float mean = means.at<float>(idx);
float sigma = sigmas.at<float>(idx);
descriptors[idx] = (descriptors[idx] - mean) / sigma; //normalize vector
}
Yes, the testing part might seem inefficient with such loop and not using Mat and overloaded operators. I had my reasons to write it that way when I needed it, and I haven't reviewed it lately... everybody's welcomed to improve it. However, for the purposes of the current question, I think it is clearer this way too.
About the saveMatToCsv()
and loadCsv()
functions, they're just my own custom functions to write to and read from a .csv file. Check this post for more info about them
UPDATE - complete dummy sample (working without any problems in OpenCV 2.4.12, Win7 x64, VS 2013)
#include <opencv2/core/core.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <fstream>
using namespace cv;
using namespace std;
void saveMatToCsv(Mat &matrix, string filename){
ofstream outputFile(filename);
outputFile << format(matrix, "CSV") << endl;
outputFile.close();
}
void main()
{
//training data and labels ------------------
Mat train_features = (Mat_<float>(10, 4) <<
1500, 25, -9, 6,
1495, 31, -8, 8,
1565, 30, -8, 7,
1536, 28, -10, 8,
1504, 29, -4, 6,
2369, 87, 15, 69,
526, 2, 47, 2,
8965, 45, 25, 14,
4500, 14, 36, 8);
Mat labels = (Mat_<int>(10, 1) << 1, 1, 1, 1, 1, -1, -1, -1, -1, -1);
//normalizing data --------------------------
Mat means, sigmas; //matrices to save all the means and standard deviations
for (int i = 0; i < train_features.cols; i++){ //take each of the features in vector
Mat mean; Mat sigma;
meanStdDev(train_features.col(i), mean, sigma); //get mean and std deviation
means.push_back(mean);
sigmas.push_back(sigma);
train_features.col(i) = (train_features.col(i) - mean) / sigma; //normalization
}
//optional steps to save all the parameters
Mat meansigma;
hconcat(means, sigmas, meansigma); //both params in same matrix
saveMatToCsv(meansigma, "meansigma.csv"); //custom function to save data to .csv file
//training SVM --------------------
SVM svm;
svm.train(train_features, labels);
svm.save("svm.xml");
//loading previously saved SVM and predicting ------------------
SVM new_svm;
new_svm.load("svm.xml");
Mat new_sample1 = (Mat_<float>(4, 1) << 1520, 26, -9, 7); //seems like a positive
Mat new_sample2 = (Mat_<float>(4, 1) << 325, 57, 14, 36); //seems like a negative
//load saved means and sigmas
CvMLData mlData;
mlData.read_csv("meansigma.csv");
const CvMat* tmp = mlData.get_values();
Mat new_meansigma(tmp, true);
tmp->CvMat::~CvMat();
Mat new_means = new_meansigma.col(0).clone();
Mat new_sigmas = new_meansigma.col(1).clone();
//normalize descriptors prior to classification (overloaded functions this time)
new_sample1 = (new_sample1 - new_means) / new_sigmas;
new_sample2 = (new_sample2 - new_means) / new_sigmas;
double predictedClass1 = new_svm.predict(new_sample1);
double predictedClass2 = new_svm.predict(new_sample2);
cout << "First sample's class: " << predictedClass1 << endl;
cout << "Second sample's class: " << predictedClass2 << endl;
}
Results: First sample's class: 1, Second sample's class: -1