Hello,
I am trying to use random forest for a mix data with continuous and categorical data. But I am not able to understand how do I use predict function with on of these samples.
Find the data format below:
39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States, <=50K
I have 35000 records in the data-set.
Please find the code below:
#include<opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core.hpp>
#include <opencv2/ml.hpp>
#include<iostream>
using namespace std;
using namespace cv;
using namespace cv::ml;
int main()
{
cout << "Loading Data..." << endl;
Ptr<TrainData> raw_data = TrainData::loadFromCSV("C:/mlpack/samples/mlpack/sample-ml-app/sample-ml-app/data/real.csv", 0, -1, -1, "ord[0,2,4,10-12]cat[1,3,5-9,13-14]", ',');
Mat data = raw_data->getSamples();
Mat labels = raw_data->getResponses();
auto rtrees = RTrees::create();
rtrees->setMaxDepth(10);
rtrees->setMinSampleCount(2);
rtrees->setUseSurrogates(false);
rtrees->setMaxCategories(2);
rtrees->setCalculateVarImportance(false);
rtrees->setActiveVarCount(0);
rtrees->setTermCriteria({ cv::TermCriteria::MAX_ITER, 100, 0 });
cout << "Training Model..." << endl;
rtrees->train(data, cv::ml::ROW_SAMPLE, labels);
cout << "Saving Model..." << endl;
rtrees->save("rt_classifier.xml");
cout << "Loading Model..." << endl;
auto rtrees2 = cv::ml::RTrees::create();
cv::FileStorage read("rt_classifier.xml", cv::FileStorage::READ);
rtrees2->read(read.root());
//rtrees2->predict();
return 0;
}
Sample to predict:
53, Private, 144361, HS-grad, 9, Married-civ-spouse, Machine-op-inspct, Husband, White, Male, 0, 0, 38, United-States
Can I get any help to format the data to feed to the predict().
Thanks in advance.