Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Random Forest with categorical features.

Hello,

I am trying to use random forest for a mix data with continuous and categorical data. But I am not able to understand how do I use predict function with on of these samples.

Find the data format below:

39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States, <=50K

I have 35000 records in the data-set.

Please find the code below:

#include<opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core.hpp>
#include <opencv2/ml.hpp>
#include<iostream>

using namespace std;
using namespace cv;
using namespace cv::ml;

int main()
{
        cout << "Loading Data..." << endl;
        Ptr<TrainData> raw_data = TrainData::loadFromCSV("C:/mlpack/samples/mlpack/sample-ml-app/sample-ml-app/data/real.csv", 0, -1, -1, "ord[0,2,4,10-12]cat[1,3,5-9,13-14]", ',');
        Mat data = raw_data->getSamples();
        Mat labels = raw_data->getResponses();

        auto rtrees = RTrees::create();
        rtrees->setMaxDepth(10);
    rtrees->setMinSampleCount(2);
    rtrees->setUseSurrogates(false);
    rtrees->setMaxCategories(2);
    rtrees->setCalculateVarImportance(false);
    rtrees->setActiveVarCount(0);
    rtrees->setTermCriteria({ cv::TermCriteria::MAX_ITER, 100, 0 });
    cout << "Training Model..." << endl;
    rtrees->train(data, cv::ml::ROW_SAMPLE, labels);
    cout << "Saving Model..." << endl;
    rtrees->save("rt_classifier.xml");

    cout << "Loading Model..." << endl;
    auto rtrees2 = cv::ml::RTrees::create();

    cv::FileStorage read("rt_classifier.xml", cv::FileStorage::READ);
    rtrees2->read(read.root());

    //rtrees2->predict();

return 0;

}

Sample to predict:

53, Private, 144361, HS-grad, 9, Married-civ-spouse, Machine-op-inspct, Husband, White, Male, 0, 0, 38, United-States

Can I get any help to format the data to feed to the predict().

Thanks in advance.

Random Forest with categorical features.

Hello,

I am trying to use random forest for a mix data with continuous and categorical data. But I am not able to understand how do I use predict function with on of these samples.

Find the data format below:

39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States, <=50K

I have 35000 records in the data-set.

Please find the code below:

#include<opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core.hpp>
#include <opencv2/ml.hpp>
#include<iostream>

using namespace std;
using namespace cv;
using namespace cv::ml;

int main()
{
        cout << "Loading Data..." << endl;
        Ptr<TrainData> raw_data = TrainData::loadFromCSV("C:/mlpack/samples/mlpack/sample-ml-app/sample-ml-app/data/real.csv", 0, -1, -1, "ord[0,2,4,10-12]cat[1,3,5-9,13-14]", ',');
        Mat data = raw_data->getSamples();
        Mat labels = raw_data->getResponses();

        auto rtrees = RTrees::create();
        rtrees->setMaxDepth(10);
    rtrees->setMinSampleCount(2);
    rtrees->setUseSurrogates(false);
    rtrees->setMaxCategories(2);
    rtrees->setCalculateVarImportance(false);
    rtrees->setActiveVarCount(0);
    rtrees->setTermCriteria({ cv::TermCriteria::MAX_ITER, 100, 0 });
    cout << "Training Model..." << endl;
    rtrees->train(data, cv::ml::ROW_SAMPLE, labels);
    cout << "Saving Model..." << endl;
    rtrees->save("rt_classifier.xml");

    cout << "Loading Model..." << endl;
    auto rtrees2 = cv::ml::RTrees::create();

    cv::FileStorage read("rt_classifier.xml", cv::FileStorage::READ);
    rtrees2->read(read.root());

    //rtrees2->predict();

return 0;

}

Sample to predict:

53, Private, 144361, HS-grad, 9, Married-civ-spouse, Machine-op-inspct, Husband, White, Male, 0, 0, 38, United-States

Can I get any help to format the data to feed to the predict().

Thanks in advance.

Random Forest with categorical features.

Hello,

I am trying to use random forest for a mix data with continuous and categorical data. But I am not able to understand how do I use predict function with on of these samples.

Find the data format below:

39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States, <=50K

I have 35000 records in the data-set.

Please find the code below:

#include<opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core.hpp>
#include <opencv2/ml.hpp>
#include<iostream>

using namespace std;
using namespace cv;
using namespace cv::ml;

int main()
{
        cout << "Loading Data..." << endl;
        Ptr<TrainData> raw_data = TrainData::loadFromCSV("C:/mlpack/samples/mlpack/sample-ml-app/sample-ml-app/data/real.csv", TrainData::loadFromCSV("real.csv", 0, -1, -1, "ord[0,2,4,10-12]cat[1,3,5-9,13-14]", ',');
        Mat data = raw_data->getSamples();
        Mat labels = raw_data->getResponses();

        auto rtrees = RTrees::create();
        rtrees->setMaxDepth(10);
    rtrees->setMinSampleCount(2);
    rtrees->setUseSurrogates(false);
    rtrees->setMaxCategories(2);
    rtrees->setCalculateVarImportance(false);
    rtrees->setActiveVarCount(0);
    rtrees->setTermCriteria({ cv::TermCriteria::MAX_ITER, 100, 0 });
    cout << "Training Model..." << endl;
    rtrees->train(data, cv::ml::ROW_SAMPLE, labels);
    cout << "Saving Model..." << endl;
    rtrees->save("rt_classifier.xml");

    cout << "Loading Model..." << endl;
    auto rtrees2 = cv::ml::RTrees::create();

    cv::FileStorage read("rt_classifier.xml", cv::FileStorage::READ);
    rtrees2->read(read.root());

    //rtrees2->predict();

return 0;

}

Sample to predict:

53, Private, 144361, HS-grad, 9, Married-civ-spouse, Machine-op-inspct, Husband, White, Male, 0, 0, 38, United-States

Can I get any help to format the data to feed to the predict().

Thanks in advance.