Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one)

// method 1, use it "as is", it returns an instance ! : // (for ANN, NBayes, or LogisticRegression, using float labels) Ptr<traindata> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10); ml->train(tdata);

// method 2, if you want to split into train/test sets for validation:

Ptr<traindata> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10); tdata->setTrainTestSplit(0.7); // 30% kept for testing

Mat trainData = tdata->getTrainSamples(); Mat trainLabels = tdata->getTrainResponses(); trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest ml->train(trainData, 0, trainLabels);

Mat testData = tdata->getTestSamples(); Mat testLabels = tdata->getTestResponses(); Mat testResults; ml->predict(testData, testResults); float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;

after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one)

// method 1, use it "as is", it returns an instance ! :
// (for ANN, NBayes, or LogisticRegression, using float labels)
Ptr<traindata> Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10);
ml->train(tdata);

ml->train(tdata);

// method 2, if you want to split into train/test sets for validation:

validation:

Ptr<traindata> Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10); tdata->setTrainTestSplit(0.7); // 30% kept for testing

testing

Mat trainData = tdata->getTrainSamples(); Mat trainLabels = tdata->getTrainResponses(); trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest ml->train(trainData, 0, trainLabels);

trainLabels);

Mat testData = tdata->getTestSamples(); Mat testLabels = tdata->getTestResponses(); Mat testResults; ml->predict(testData, testResults); float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;

testLabels.rows;

after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one)

// method 1, use it "as is", it returns an instance ! :
// (for ANN, NBayes, or LogisticRegression, using float labels)
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10);
Srivastava/Desktop/train.csv",
       0, // lines to skip
       0, // 1st elem is the label
      -1); // only 1 response per line
ml->train(tdata);

// method 2, if you want to split into train/test sets for validation:

Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,1,10);
Srivastava/Desktop/train.csv",0,0,-1);
tdata->setTrainTestSplit(0.7); // 30% kept for testing

Mat trainData = tdata->getTrainSamples();
Mat trainLabels = tdata->getTrainResponses();
trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest
ml->train(trainData, 0, trainLabels);

Mat testData = tdata->getTestSamples();
Mat testLabels = tdata->getTestResponses();
Mat testResults;
ml->predict(testData, testResults);
float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;

after loading your csv data, you can query your data & labels from the TrainData object (you don't need to make a second one) note, that TrainData::loadFromCSV returns an instance:

// method 1, use it "as is", it returns an instance ! :
// (for ANN, Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",
       0, // lines to skip
       0, // 1st elem is the label
      -1); // only 1 response per line


// for NBayes, or LogisticRegression, using float labels)
Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",
       0, // lines to skip
       0, // 1st elem is the label
      -1); // only 1 response per line
labels, you could use it "as is":
ml->train(tdata);

// method 2, if you want to split into train/test sets for validation:

Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/train.csv",0,0,-1);
tdata->setTrainTestSplit(0.7); // 30% kept for testing

for an SVM or KNearest, you'd need to convert the responses to integer:
Mat trainData = tdata->getTrainSamples();
Mat trainLabels = tdata->getTrainResponses();
trainLabels.convertTo(trainLabels, CV_32S); // needed for SVM, KNearest
ml->train(trainData, svm->train(trainData, 0, trainLabels);

//
// for an ANN, you need to "one-hot encode" the labels:
// for 5 classes, and a label of 2, it looks like:
// [0,0,1,0,0] 
// (this is the expected state of the output neurons)
//
Mat trainData = tdata->getTrainSamples();
Mat trainLabels = tdata->getTrainResponses();
int numClasses = 10; // assuming mnist
Mat hot(trainLabels.rows, numClasses, CV_32F, 0.0f); // all zero, initially
for (int i=0; i<trainLabels.rows; i++) {
        int id = (int)trainLabels.at<float>(i);
        hot.at<float>(i, id) = 1.0f; 
}
ann->train(trainData, 0, hot);

for your test set, just repeat the same steps:

Ptr<TrainData> tdata = TrainData::loadFromCSV("C:/Users/Shreya Srivastava/Desktop/test.csv", 0,0,-1);
Mat testData = tdata->getTestSamples();
tdata->getTrainSamples();
Mat testLabels = tdata->getTestResponses();
tdata->getTrainResponses();
Mat testResults;
ml->predict(testData, ann->predict(testData, testResults);
float accuracy = float(countNonZero(testResults == testLabels)) / testLabels.rows;