1 | initial version |
after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one)
// method 1, use it "as is", it returns an instance ! : // (for ANN, NBayes, or LogisticRegression, using float labels) Ptr<traindata> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10); ml->train(tdata);
// method 2, if you want to split into train/test sets for validation:
Ptr<traindata> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10); tdata->setTrainTestSplit(0.7); // 30% kept for testing
Mat trainData = tdata->getTrainSamples(); Mat trainLabels = tdata->getTrainResponses(); trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest ml->train(trainData, 0, trainLabels);
Mat testData = tdata->getTestSamples(); Mat testLabels = tdata->getTestResponses(); Mat testResults; ml->predict(testData, testResults); float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;
2 | No.2 Revision |
after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one)
// method 1, use it "as is", it returns an instance ! :
// (for ANN, NBayes, or LogisticRegression, using float labels)
Ptr<traindata>
3 | No.3 Revision |
after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one)
// method 1, use it "as is", it returns an instance ! :
// (for ANN, NBayes, or LogisticRegression, using float labels)
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10);
Srivastava/Desktop/train.csv",
0, // lines to skip
0, // 1st elem is the label
-1); // only 1 response per line
ml->train(tdata);
// method 2, if you want to split into train/test sets for validation:
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10);
Srivastava/Desktop/train.csv",0,0,-1);
tdata->setTrainTestSplit(0.7); // 30% kept for testing
Mat trainData = tdata->getTrainSamples();
Mat trainLabels = tdata->getTrainResponses();
trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest
ml->train(trainData, 0, trainLabels);
Mat testData = tdata->getTestSamples();
Mat testLabels = tdata->getTestResponses();
Mat testResults;
ml->predict(testData, testResults);
float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;
4 | No.4 Revision |
after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one) note, that TrainData::loadFromCSV returns an instance:
// method 1, use it "as is", it returns an instance ! :
// (for ANN, Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",
0, // lines to skip
0, // 1st elem is the label
-1); // only 1 response per line
// for NBayes, or LogisticRegression, using float labels)
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",
0, // lines to skip
0, // 1st elem is the label
-1); // only 1 response per line
labels, you could use it "as is":
ml->train(tdata);
// method 2, if you want to split into train/test sets for validation:
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,0,-1);
tdata->setTrainTestSplit(0.7); // 30% kept for testing
for an SVM or KNearest, you'd need to convert the responses to integer:
Mat trainData = tdata->getTrainSamples();
Mat trainLabels = tdata->getTrainResponses();
trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest
ml->train(trainData, svm->train(trainData, 0, trainLabels);
//
// for an ANN, you need to "one-hot encode" the labels:
// for 5 classes, and a label of 2, it looks like:
// [0,0,1,0,0]
// (this is the expected state of the output neurons)
//
Mat trainData = tdata->getTrainSamples();
Mat trainLabels = tdata->getTrainResponses();
int numClasses = 10; // assuming mnist
Mat hot(trainLabels.rows, numClasses, CV_32F, 0.0f); // all zero, initially
for (int i=0; i<trainLabels.rows; i++) {
int id = (int)trainLabels.at<float>(i);
hot.at<float>(i, id) = 1.0f;
}
ann->train(trainData, 0, hot);
for your test set, just repeat the same steps:
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/test.csv", 0,0,-1);
Mat testData = tdata->getTestSamples();
tdata->getTrainSamples();
Mat testLabels = tdata->getTestResponses();
tdata->getTrainResponses();
Mat testResults;
ml->predict(testData, ann->predict(testData, testResults);
float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;