Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

number recognition using multi layer perceptron

Hi all !

I think I am more than close to finish my project which is a sudoku grabber After using tesseract, I realised that it was to slow and not enough accurate so I try an other solution I try to use an Artificial Neural Network and more precisely a Multi-Layer Perceptron

I have 126 training images which represent number. those images have normalized size (25,25)

I have a feature function which extract some information about the cell image:

Mat features(Mat in, int sizeData) {
int HORIZONTAL = 1;
int VERTICAL = 0;

//Histogram features
Mat vhist = ProjectedHistogram(in, VERTICAL);
Mat hhist = ProjectedHistogram(in, HORIZONTAL);

//Low data feature
Mat lowData;
resize(in, lowData, Size(sizeData, sizeData));

//Last 10 is the number of moments components
int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;

Mat out = Mat::zeros(1, numCols, CV_32F);
//Asign values to feature
int j = 0;
for (int i = 0; i < vhist.cols; i++) {
    out.at<float>(j) = vhist.at<float>(i);
    j++;
}
for (int i = 0; i < hhist.cols; i++) {
    out.at<float>(j) = hhist.at<float>(i);
    j++;
}
for (int x = 0; x < lowData.cols; x++) {
    for (int y = 0; y < lowData.rows; y++) {
        out.at<float>(j) = (float) lowData.at < unsigned
        char > (x, y);
        j++;
    }
}
return out;

}

I also have a function called createDataForTraining which take all my input picture and transform these to a matrix of features called TrainingData. so in TrainingData each row represents a image and each row has 275 cols. For now TrainingClasses are (1, 275)

void createDataForTraining() {

char *path = "./training/";

Mat classes;
Mat trainingDataf5;
Mat trainingDataf10;
Mat trainingDataf15;
Mat trainingDataf20;

vector<int> trainingLabels;
for (int i = 0; i < numCharacters; i++) {
    int numFiles = numFilesChars[i];
    for (int j = 1; j <= numFiles; j++) {
        // cout << "Character "<< strCharacters[i] << " file: " << j << "\n";
        stringstream ss(stringstream::in | stringstream::out);
        ss << path << strCharacters[i] << "/" << j << ".jpg";
        string filename = ss.str();
        // cout << filename << endl;
        Mat img = imread(ss.str(), 0);
        Mat f5 = features(img, 5);
        Mat f10 = features(img, 10);
        Mat f15 = features(img, 15);
        Mat f20 = features(img, 20);

        trainingDataf5.push_back(f5);
        trainingDataf10.push_back(f10);
        trainingDataf15.push_back(f15);
        trainingDataf20.push_back(f20);
        trainingLabels.push_back(i);
    }
}

trainingDataf5.convertTo(trainingDataf5, CV_32F);
trainingDataf10.convertTo(trainingDataf10, CV_32F);
trainingDataf15.convertTo(trainingDataf15, CV_32F);
trainingDataf20.convertTo(trainingDataf20, CV_32F);
Mat(trainingLabels).copyTo(classes);


FileStorage fs("OCR.xml", FileStorage::WRITE);
fs << "TrainingDataF5" << trainingDataf5;
fs << "TrainingDataF10" << trainingDataf10;
fs << "TrainingDataF15" << trainingDataf15;
fs << "TrainingDataF20" << trainingDataf20;
fs << "classes" << classes;
fs.release();

}

I modify TrainingClasses because It has to be a matrix with (TrainingData.rows, number of classes

Mat TrainingClasses;
TrainingClasses = Mat::zeros(TrainingData.rows, numCharacters, CV_32F);
for (int i = 0; i < TrainingClasses.rows; i++) {
    for (int k = 0; k < TrainingClasses.cols; k++) {
        //If class of data i is same than a k class
        if (k == Classes.at<int>(i))
            TrainingClasses.at<float>(i, k) = 1.0f;
        else
            TrainingClasses.at<float>(i, k) = 0.0f;
    }
}

And I'm blocked here with this code :

            mlp = ANN_MLP::create();
            Mat layersSize = Mat(3, 1, CV_16U);
            layersSize.row(0) = Scalar(TrainingData.cols);
            layersSize.row(1) = Scalar(hiddenLayerSize);
            layersSize.row(2) = Scalar(Classes.cols);
            mlp->setLayerSizes(layersSize);
            mlp->setActivationFunction(ANN_MLP::SIGMOID_SYM);
            TermCriteria termCrit = TermCriteria(
                    TermCriteria::COUNT + TermCriteria::EPS,
                    100000000,
                    0.000000000000000001
            );
            mlp->setTermCriteria(termCrit);
            mlp->setTrainMethod(ANN_MLP::BACKPROP);
            Ptr<TrainData> trainingData = TrainData::create(TrainingData, ROW_SAMPLE, TrainingClasses);

I have this error but I'm not able to fix it

Bad argument (output training data should be a floating-point matrix with the number of rows equal to the number of training samples and the number of columns equal to the size of last (output) layer)

Any ideas ?

Thank you !

number recognition using multi layer perceptron

Hi all !

I think I am more than close to finish my project which is a sudoku grabber After using tesseract, I realised that it was to slow and not enough accurate so I try an other solution I try to use an Artificial Neural Network and more precisely a Multi-Layer Perceptron

I'm stuck with this error:

Bad argument (output training data should be a floating-point matrix with the number of rows equal to the number of training samples and the number of columns equal to the size of last (output) layer)

and this error is related to :

 Ptr<TrainData> trainingData = TrainData::create(TrainingData, ROW_SAMPLE, TrainingClasses);

Here is some details:

I have 126 training images which represent number. those images have normalized size (25,25)

I have a feature function which extract some information about the cell image:

Mat features(Mat in, int sizeData) {
int HORIZONTAL = 1;
int VERTICAL = 0;

//Histogram features
Mat vhist = ProjectedHistogram(in, VERTICAL);
Mat hhist = ProjectedHistogram(in, HORIZONTAL);

//Low data feature
Mat lowData;
resize(in, lowData, Size(sizeData, sizeData));

//Last 10 is the number of moments components
int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;

Mat out = Mat::zeros(1, numCols, CV_32F);
//Asign values to feature
int j = 0;
for (int i = 0; i < vhist.cols; i++) {
    out.at<float>(j) = vhist.at<float>(i);
    j++;
}
for (int i = 0; i < hhist.cols; i++) {
    out.at<float>(j) = hhist.at<float>(i);
    j++;
}
for (int x = 0; x < lowData.cols; x++) {
    for (int y = 0; y < lowData.rows; y++) {
        out.at<float>(j) = (float) lowData.at < unsigned
        char > (x, y);
        j++;
    }
}
return out;
}

 Mat ProjectedHistogram(Mat img, int t) {
int sz = (t) ? img.rows : img.cols;
Mat mhist = Mat::zeros(1, sz, CV_32F);
for (int j = 0; j < sz; j++) {
    Mat data = (t) ? img.row(j) : img.col(j);
    mhist.at<float>(j) = countNonZero(data);
}
//Normalize histogram
double min, max;
minMaxLoc(mhist, &min, &max);
if (max > 0)
    mhist.convertTo(mhist, -1, 1.0f / max, 0);
return mhist;
}

}

I also have a function called createDataForTraining which take all my input picture and transform these to a matrix of features called TrainingData. so in TrainingData each row represents a image and each row has 275 cols. For now TrainingClasses are (1, 275)

void createDataForTraining() {

char *path = "./training/";

Mat classes;
Mat trainingDataf5;
Mat trainingDataf10;
Mat trainingDataf15;
Mat trainingDataf20;

vector<int> trainingLabels;
for (int i = 0; i < numCharacters; i++) {
    int numFiles = numFilesChars[i];
    for (int j = 1; j <= numFiles; j++) {
        // cout << "Character "<< strCharacters[i] << " file: " << j << "\n";
        stringstream ss(stringstream::in | stringstream::out);
        ss << path << strCharacters[i] << "/" << j << ".jpg";
        string filename = ss.str();
        // cout << filename << endl;
        Mat img = imread(ss.str(), 0);
        Mat f5 = features(img, 5);
        Mat f10 = features(img, 10);
        Mat f15 = features(img, 15);
        Mat f20 = features(img, 20);

        trainingDataf5.push_back(f5);
        trainingDataf10.push_back(f10);
        trainingDataf15.push_back(f15);
        trainingDataf20.push_back(f20);
        trainingLabels.push_back(i);
    }
}

trainingDataf5.convertTo(trainingDataf5, CV_32F);
trainingDataf10.convertTo(trainingDataf10, CV_32F);
trainingDataf15.convertTo(trainingDataf15, CV_32F);
trainingDataf20.convertTo(trainingDataf20, CV_32F);
Mat(trainingLabels).copyTo(classes);


FileStorage fs("OCR.xml", FileStorage::WRITE);
fs << "TrainingDataF5" << trainingDataf5;
fs << "TrainingDataF10" << trainingDataf10;
fs << "TrainingDataF15" << trainingDataf15;
fs << "TrainingDataF20" << trainingDataf20;
fs << "classes" << classes;
fs.release();

}

I modify TrainingClasses because It has to be a matrix with (TrainingData.rows, number of classes

Mat TrainingClasses;
TrainingClasses = Mat::zeros(TrainingData.rows, numCharacters, CV_32F);
for (int i = 0; i < TrainingClasses.rows; i++) {
    for (int k = 0; k < TrainingClasses.cols; k++) {
        //If class of data i is same than a k class
        if (k == Classes.at<int>(i))
            TrainingClasses.at<float>(i, k) = 1.0f;
        else
            TrainingClasses.at<float>(i, k) = 0.0f;
    }
}

And I'm blocked here with this code :

            mlp = ANN_MLP::create();
            Mat layersSize = Mat(3, 1, CV_16U);
            layersSize.row(0) = Scalar(TrainingData.cols);
            layersSize.row(1) = Scalar(hiddenLayerSize);
            layersSize.row(2) = Scalar(Classes.cols);
            mlp->setLayerSizes(layersSize);
            mlp->setActivationFunction(ANN_MLP::SIGMOID_SYM);
            TermCriteria termCrit = TermCriteria(
                    TermCriteria::COUNT + TermCriteria::EPS,
                    100000000,
                    0.000000000000000001
            );
            mlp->setTermCriteria(termCrit);
            mlp->setTrainMethod(ANN_MLP::BACKPROP);
            Ptr<TrainData> trainingData = TrainData::create(TrainingData, ROW_SAMPLE, TrainingClasses);

I have this error but I'm not able to fix it

Bad argument (output training data should be a floating-point matrix with the number of rows equal to the number of training samples and the number of columns equal to the size of last (output) layer)

Any ideas ?

Thank you !

click to hide/show revision 3
No.3 Revision

number recognition using multi layer perceptron

Hi all !

I think I am more than close to finish my project which is a sudoku grabber After using tesseract, I realised that it was to slow and not enough accurate so I try an other solution I try to use an Artificial Neural Network and more precisely a Multi-Layer Perceptron

I'm stuck with this error:

Bad argument (output training data should be a floating-point matrix with the number of rows equal to the number of training samples and the number of columns equal to the size of last (output) layer)

and this error is related to :

 Ptr<TrainData> trainingData = TrainData::create(TrainingData, ROW_SAMPLE, TrainingClasses);

Here is some details:

I have 126 training images which represent number. those images have normalized size (25,25)

I have a feature function which extract some information about the cell image:

Mat features(Mat in, int sizeData) {
int HORIZONTAL = 1;
int VERTICAL = 0;

//Histogram features
Mat vhist = ProjectedHistogram(in, VERTICAL);
Mat hhist = ProjectedHistogram(in, HORIZONTAL);

//Low data feature
Mat lowData;
resize(in, lowData, Size(sizeData, sizeData));

//Last 10 is the number of moments components
int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols;

Mat out = Mat::zeros(1, numCols, CV_32F);
//Asign values to feature
int j = 0;
for (int i = 0; i < vhist.cols; i++) {
    out.at<float>(j) = vhist.at<float>(i);
    j++;
}
for (int i = 0; i < hhist.cols; i++) {
    out.at<float>(j) = hhist.at<float>(i);
    j++;
}
for (int x = 0; x < lowData.cols; x++) {
    for (int y = 0; y < lowData.rows; y++) {
        out.at<float>(j) = (float) lowData.at < unsigned
        char > (x, y);
        j++;
    }
}
return out;
}

 Mat ProjectedHistogram(Mat img, int t) {
int sz = (t) ? img.rows : img.cols;
Mat mhist = Mat::zeros(1, sz, CV_32F);
for (int j = 0; j < sz; j++) {
    Mat data = (t) ? img.row(j) : img.col(j);
    mhist.at<float>(j) = countNonZero(data);
}
//Normalize histogram
double min, max;
minMaxLoc(mhist, &min, &max);
if (max > 0)
    mhist.convertTo(mhist, -1, 1.0f / max, 0);
return mhist;
}

I also have a function called createDataForTraining which take all my input picture and transform these to a matrix of features called TrainingData. so in TrainingData each row represents a image and each row has 275 cols. For now TrainingClasses are (1, 275)

void createDataForTraining() {

char *path = "./training/";

Mat classes;
Mat trainingDataf5;
Mat trainingDataf10;
Mat trainingDataf15;
Mat trainingDataf20;

vector<int> trainingLabels;
for (int i = 0; i < numCharacters; i++) {
    int numFiles = numFilesChars[i];
    for (int j = 1; j <= numFiles; j++) {
        // cout << "Character "<< strCharacters[i] << " file: " << j << "\n";
        stringstream ss(stringstream::in | stringstream::out);
        ss << path << strCharacters[i] << "/" << j << ".jpg";
        string filename = ss.str();
        // cout << filename << endl;
        Mat img = imread(ss.str(), 0);
        Mat f5 = features(img, 5);
        Mat f10 = features(img, 10);
        Mat f15 = features(img, 15);
        Mat f20 = features(img, 20);

        trainingDataf5.push_back(f5);
        trainingDataf10.push_back(f10);
        trainingDataf15.push_back(f15);
        trainingDataf20.push_back(f20);
        trainingLabels.push_back(i);
    }
}

trainingDataf5.convertTo(trainingDataf5, CV_32F);
trainingDataf10.convertTo(trainingDataf10, CV_32F);
trainingDataf15.convertTo(trainingDataf15, CV_32F);
trainingDataf20.convertTo(trainingDataf20, CV_32F);
Mat(trainingLabels).copyTo(classes);


FileStorage fs("OCR.xml", FileStorage::WRITE);
fs << "TrainingDataF5" << trainingDataf5;
fs << "TrainingDataF10" << trainingDataf10;
fs << "TrainingDataF15" << trainingDataf15;
fs << "TrainingDataF20" << trainingDataf20;
fs << "classes" << classes;
fs.release();

}

I modify TrainingClasses because It has to be a matrix with (TrainingData.rows, number of classes

Mat TrainingClasses;
TrainingClasses = Mat::zeros(TrainingData.rows, numCharacters, CV_32F);
for (int i = 0; i < TrainingClasses.rows; i++) {
    for (int k = 0; k < TrainingClasses.cols; k++) {
        //If class of data i is same than a k class
        if (k == Classes.at<int>(i))
            TrainingClasses.at<float>(i, k) = 1.0f;
        else
            TrainingClasses.at<float>(i, k) = 0.0f;
    }
}

And I'm blocked here with this code :

            mlp = ANN_MLP::create();
            Mat layersSize = Mat(3, 1, CV_16U);
            layersSize.row(0) = Scalar(TrainingData.cols);
            layersSize.row(1) = Scalar(hiddenLayerSize);
            layersSize.row(2) = Scalar(Classes.cols);
            mlp->setLayerSizes(layersSize);
            mlp->setActivationFunction(ANN_MLP::SIGMOID_SYM);
            TermCriteria termCrit = TermCriteria(
                    TermCriteria::COUNT + TermCriteria::EPS,
                    100000000,
                    0.000000000000000001
            );
            mlp->setTermCriteria(termCrit);
            mlp->setTrainMethod(ANN_MLP::BACKPROP);
            Ptr<TrainData> trainingData = TrainData::create(TrainingData, ROW_SAMPLE, TrainingClasses);

Any ideas ?

Thank you !