Setting up MLP for image recognition
Hi! I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.
I created my first ANN MLP and learned it the XOR:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <iomanip>
using namespace cv;
using namespace ml;
using namespace std;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << "]" << endl;
}
}
}
int main()
{
const int hiddenLayerSize = 4;
float inputTrainingDataArray[4][2] = {
{ 0.0, 0.0 },
{ 0.0, 1.0 },
{ 1.0, 0.0 },
{ 1.0, 1.0 }
};
Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);
float outputTrainingDataArray[4][1] = {
{ 0.0 },
{ 1.0 },
{ 1.0 },
{ 0.0 }
};
Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
mlp->train(trainingData
/*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
);
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
Mat result;
mlp->predict(sample, result);
cout << sample << " -> ";// << result << endl;
print(result, 0);
cout << endl;
}
return 0;
}
It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.
But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>
#include <Windows.h>
using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;
const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[ ";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << " ]" << endl;
}
}
}
bool loadImage(string imagePath, Mat& outputImage)
{
// load image in grayscale
Mat image = imread(imagePath, IMREAD_GRAYSCALE);
Mat temp;
// check for invalid input
if (image.empty()) {
cout << "Could not open or find the image" << std::endl;
return false;
}
// resize the image
Size size(WIDTH_SIZE, HEIGHT_SIZE ...
for a similar situation, i had values like this:
Thanks, the backprop weight scale was too big - I set it to 0.0001 or 0.001 and I got better results. Then I changed the hidden layer size to 100 and i was able to learn the NN my images ;)
Hi @majkel -- thank you very much for posting your ANN_MLP XOR code. I used it to build my own image classification network: http://answers.opencv.org/question/18...
By the way, I believe that
should be
@sjhalayka, since this are binary flags, it has no effect on result - 0100 + 0010 is 0110, the same as using OR operatorion