Setting up MLP for image recognition

asked 2016-05-28 10:04:27 -0500

majkel gravatar image

Hi! I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.


I created my first ANN MLP and learned it the XOR:

#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>

#include <iostream>
#include <iomanip>

using namespace cv;
using namespace ml;
using namespace std;

void print(Mat& mat, int prec)
{
    for (int i = 0; i<mat.size().height; i++)
    {
        cout << "[";
        for (int j = 0; j<mat.size().width; j++)
        {
            cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
            if (j != mat.size().width - 1)
                cout << ", ";
            else
                cout << "]" << endl;
        }
    }
}

int main()
{
    const int hiddenLayerSize = 4;
    float inputTrainingDataArray[4][2] = {
        { 0.0, 0.0 },
        { 0.0, 1.0 },
        { 1.0, 0.0 },
        { 1.0, 1.0 }
    };
    Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);

    float outputTrainingDataArray[4][1] = {
        { 0.0 },
        { 1.0 },
        { 1.0 },
        { 0.0 }
    };
    Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);

    Ptr<ANN_MLP> mlp = ANN_MLP::create();

    Mat layersSize = Mat(3, 1, CV_16U);
    layersSize.row(0) = Scalar(inputTrainingData.cols);
    layersSize.row(1) = Scalar(hiddenLayerSize);
    layersSize.row(2) = Scalar(outputTrainingData.cols);
    mlp->setLayerSizes(layersSize);

    mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);

    TermCriteria termCrit = TermCriteria(
        TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
        100000000,
        0.000000000000000001
    );
    mlp->setTermCriteria(termCrit);

    mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);

    Ptr<TrainData> trainingData = TrainData::create(
        inputTrainingData,
        SampleTypes::ROW_SAMPLE,
        outputTrainingData
    );

    mlp->train(trainingData
        /*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
        + ANN_MLP::TrainFlags::NO_INPUT_SCALE
        + ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
    );

    for (int i = 0; i < inputTrainingData.rows; i++) {
        Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
        Mat result;
        mlp->predict(sample, result);
        cout << sample << " -> ";// << result << endl;
        print(result, 0);
        cout << endl;
    }

    return 0;
}

It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.


But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!

#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>

#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>

#include <Windows.h>

using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;

const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;

void print(Mat& mat, int prec)
{
    for (int i = 0; i<mat.size().height; i++)
    {
        cout << "[ ";
        for (int j = 0; j<mat.size().width; j++)
        {
            cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
            if (j != mat.size().width - 1)
                cout << ", ";
            else
                cout << " ]" << endl;
        }
    }
}

bool loadImage(string imagePath, Mat& outputImage)
{
    // load image in grayscale
    Mat image = imread(imagePath, IMREAD_GRAYSCALE);
    Mat temp;

    // check for invalid input
    if (image.empty()) {
        cout << "Could not open or find the image" << std::endl;
        return false;
    }

    // resize the image
    Size size(WIDTH_SIZE, HEIGHT_SIZE ...
(more)
edit retag flag offensive close merge delete

Comments

1

for a similar situation, i had values like this:

    ann->setActivationFunction(ml::ANN_MLP::SIGMOID_SYM,0,0);
    ann->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 900, 0.0001));
    ann->setTrainMethod(ml::ANN_MLP::BACKPROP, 0.0001);
berak gravatar imageberak ( 2016-05-29 04:17:59 -0500 )edit

Thanks, the backprop weight scale was too big - I set it to 0.0001 or 0.001 and I got better results. Then I changed the hidden layer size to 100 and i was able to learn the NN my images ;)

majkel gravatar imagemajkel ( 2016-05-29 09:41:33 -0500 )edit

Hi @majkel -- thank you very much for posting your ANN_MLP XOR code. I used it to build my own image classification network: http://answers.opencv.org/question/18...

By the way, I believe that

ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE

should be

ANN_MLP::TrainFlags::UPDATE_WEIGHTS
| ANN_MLP::TrainFlags::NO_INPUT_SCALE
| ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
sjhalayka gravatar imagesjhalayka ( 2017-12-15 20:01:39 -0500 )edit

@sjhalayka, since this are binary flags, it has no effect on result - 0100 + 0010 is 0110, the same as using OR operatorion

majkel gravatar imagemajkel ( 2017-12-16 04:21:38 -0500 )edit