Hi all !
I think I am more than close to finish my project which is a sudoku grabber After using tesseract, I realised that it was to slow and not enough accurate so I try an other solution I try to use an Artificial Neural Network and more precisely a Multi-Layer Perceptron
I have 126 training images which represent number. those images have normalized size (25,25)
I have a feature function which extract some information about the cell image:
Mat features(Mat in, int sizeData) {
int HORIZONTAL = 1;
int VERTICAL = 0;
//Histogram features
Mat vhist = ProjectedHistogram(in, VERTICAL);
Mat hhist = ProjectedHistogram(in, HORIZONTAL);
//Low data feature
Mat lowData;
resize(in, lowData, Size(sizeData, sizeData));
//Last 10 is the number of moments components
int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;
Mat out = Mat::zeros(1, numCols, CV_32F);
//Asign values to feature
int j = 0;
for (int i = 0; i < vhist.cols; i++) {
out.at<float>(j) = vhist.at<float>(i);
j++;
}
for (int i = 0; i < hhist.cols; i++) {
out.at<float>(j) = hhist.at<float>(i);
j++;
}
for (int x = 0; x < lowData.cols; x++) {
for (int y = 0; y < lowData.rows; y++) {
out.at<float>(j) = (float) lowData.at < unsigned
char > (x, y);
j++;
}
}
return out;
}
I also have a function called createDataForTraining which take all my input picture and transform these to a matrix of features called TrainingData
. so in TrainingData
each row represents a image and each row has 275 cols. For now TrainingClasses are (1, 275)
void createDataForTraining() {
char *path = "./training/";
Mat classes;
Mat trainingDataf5;
Mat trainingDataf10;
Mat trainingDataf15;
Mat trainingDataf20;
vector<int> trainingLabels;
for (int i = 0; i < numCharacters; i++) {
int numFiles = numFilesChars[i];
for (int j = 1; j <= numFiles; j++) {
// cout << "Character "<< strCharacters[i] << " file: " << j << "\n";
stringstream ss(stringstream::in | stringstream::out);
ss << path << strCharacters[i] << "/" << j << ".jpg";
string filename = ss.str();
// cout << filename << endl;
Mat img = imread(ss.str(), 0);
Mat f5 = features(img, 5);
Mat f10 = features(img, 10);
Mat f15 = features(img, 15);
Mat f20 = features(img, 20);
trainingDataf5.push_back(f5);
trainingDataf10.push_back(f10);
trainingDataf15.push_back(f15);
trainingDataf20.push_back(f20);
trainingLabels.push_back(i);
}
}
trainingDataf5.convertTo(trainingDataf5, CV_32F);
trainingDataf10.convertTo(trainingDataf10, CV_32F);
trainingDataf15.convertTo(trainingDataf15, CV_32F);
trainingDataf20.convertTo(trainingDataf20, CV_32F);
Mat(trainingLabels).copyTo(classes);
FileStorage fs("OCR.xml", FileStorage::WRITE);
fs << "TrainingDataF5" << trainingDataf5;
fs << "TrainingDataF10" << trainingDataf10;
fs << "TrainingDataF15" << trainingDataf15;
fs << "TrainingDataF20" << trainingDataf20;
fs << "classes" << classes;
fs.release();
}
I modify TrainingClasses because It has to be a matrix with (TrainingData.rows, number of classes
Mat TrainingClasses;
TrainingClasses = Mat::zeros(TrainingData.rows, numCharacters, CV_32F);
for (int i = 0; i < TrainingClasses.rows; i++) {
for (int k = 0; k < TrainingClasses.cols; k++) {
//If class of data i is same than a k class
if (k == Classes.at<int>(i))
TrainingClasses.at<float>(i, k) = 1.0f;
else
TrainingClasses.at<float>(i, k) = 0.0f;
}
}
And I'm blocked here with this code :
mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(TrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(Classes.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::COUNT + TermCriteria::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(TrainingData, ROW_SAMPLE, TrainingClasses);
I have this error but I'm not able to fix it
Bad argument (output training data should be a floating-point matrix with the number of rows equal to the number of training samples and the number of columns equal to the size of last (output) layer)
Any ideas ?
Thank you !