Revision history [back]

OpenCV Neural Network Sigmoid Output

I have been using OpenCV for a quite time. I decided to check its power for Machine Learning lately. So I ended up with implementing a neural network for face recognition. To summarize my strategy for face recognition :

Read images from a csv of some face database.
Roll images to a Mat array row wise.
Apply PCA for dimensionality reduction.
Use projections of PCA to train the network.
Predict the test data using the trained network.

So everything was OK until the prediction stage. I was using the max responsed output unit to classify the face. So normally OpenCV's sigmoid implementation should give values in range of -1 to 1 which is stated at the docs. 1 is the max closure to class. After I got nearly 0 accuracy I checked the output responses for each class for each test data. I was suprised with the values : 14.53, -1.7 , #IND . If sigmoid was applied, how could i get these values ? Where am i doing wrong ?

To help you understand the matter and for the ones wondering how to apply PCA and use it with NN I m sharing my code :

Reading csv:

void read_csv(const string& filename, vector<mat>& images, vector<int>& labels, char separator = ';') 
{
    std::ifstream file(filename.c_str(), ifstream::in);
    if (!file) 
    {
        string error_message = "No valid input file was given, please check the given filename.";
        CV_Error(1, error_message);
    }
    string line, path, classlabel;
    while (getline(file, line)) 
    {
        stringstream liness(line);

        getline(liness, path, separator);
        getline(liness, classlabel);

        if(!path.empty() && !classlabel.empty()) 
        {
            Mat im = imread(path, 0);

            images.push_back(im);
            labels.push_back(atoi(classlabel.c_str()));
        }
    }
}

Rolling images row by row :

Mat rollVectortoMat(const vector<Mat> &data)
{
   Mat dst(static_cast<int>(data.size()), data[0].rows*data[0].cols, CV_32FC1);
   for(unsigned int i = 0; i < data.size(); i++)
   {
      Mat image_row = data[i].clone().reshape(1,1);
      Mat row_i = dst.row(i);                                       
      image_row.convertTo(row_i,CV_32FC1, 1/255.);
   }
   return dst;
}

Converting vector of labels to Mat of labels

Mat getLabels(const vector<int> &data,int classes = 20)
{
    Mat labels(data.size(),classes,CV_32FC1);

    for(int i = 0; i <data.size() ; i++)
    {
        int cls = data[i] - 1;  
        labels.at<float>(i,cls) = 1.0;  
    }

    return labels;
}

MAIN

int main()
{

    PCA pca;

    vector<Mat> images_train;
    vector<Mat> images_test;
    vector<int> labels_train;
    vector<int> labels_test;

    read_csv("train1k.txt",images_train,labels_train);
    read_csv("test1k.txt",images_test,labels_test);

    Mat rawTrainData = rollVectortoMat(images_train);                       
    Mat rawTestData  = rollVectortoMat(images_test);                

    Mat trainLabels = getLabels(labels_train);
    Mat testLabels  = getLabels(labels_test);

    int pca_size = 500;

    Mat trainData(rawTrainData.rows, pca_size,rawTrainData.type());
    Mat testData(rawTestData.rows,pca_size,rawTestData.type());


    pca(rawTrainData,Mat(),CV_PCA_DATA_AS_ROW,pca_size);

    for(int i = 0; i < rawTrainData.rows ; i++)
        pca.project(rawTrainData.row(i),trainData.row(i));

    for(int i = 0; i < rawTestData.rows ; i++)
        pca.project(rawTestData.row(i),testData.row(i));



    Mat layers = Mat(3,1,CV_32SC1);
    int sz = trainData.cols ;

    layers.row(0) = Scalar(sz);
    layers.row(1) = Scalar(1000);
    layers.row(2) = Scalar(20);

    CvANN_MLP mlp;
    CvANN_MLP_TrainParams params;
    CvTermCriteria criteria;

    criteria.max_iter = 1000;
    criteria.epsilon  = 0.00001f;
    criteria.type     = CV_TERMCRIT_ITER | CV_TERMCRIT_EPS;

    params.train_method    = CvANN_MLP_TrainParams::BACKPROP;
    params.bp_dw_scale     = 0.1f;
    params.bp_moment_scale = 0.1f;
    params.term_crit       = criteria;

    mlp.create(layers,CvANN_MLP::SIGMOID_SYM);
    int i = mlp.train(trainData,trainLabels,Mat(),Mat(),params);

    int t = 0, f = 0;

    for(int i = 0; i < testData.rows ; i++)
    {
        Mat response(1,20,CV_32FC1);
        Mat sample = testData.row(i);

        mlp.predict(sample,response);

        float max = -1000000000000.0f;
        int cls = -1;

        for(int j = 0 ; j < 20 ; j++)   
        {
            float value = response.at<float>(0,j);

            if(value > max)
            {
                max = value;
                cls = j + 1;
            }
        }

        if(cls == labels_test[i])
            t++;
        else
            f++;
    }


    return 0;
}

NOTE: I used AT&T 's first 20 class for my dataset.