Hello everyone !
I used a k-Nearest Neighbors algorithm (knn) and I trained it with the MNIST database
Here is the code for the training:
Ptr<ml::KNearest> getKnn()
{
Ptr<ml::KNearest> knn(ml::KNearest::create());
FILE *fp = fopen("/keep/Repo/USELESS/_sandbox/cpp/learning-cpp/sudoku/assets/train-images-idx3-ubyte", "rb");
FILE *fp2 = fopen("/keep/Repo/USELESS/_sandbox/cpp/learning-cpp/sudoku/assets/train-labels-idx1-ubyte", "rb");
if (!fp || !fp2)
{
cout << "can't open file" << endl;
}
int magicNumber = readFlippedInteger(fp);
int numImages = readFlippedInteger(fp);
int numRows = readFlippedInteger(fp);
int numCols = readFlippedInteger(fp);
fseek(fp2, 0x08, SEEK_SET);
int size = numRows * numCols;
cout << "size: " << size << endl;
cout << "rows: " << numRows << endl;
cout << "cols: " << numCols << endl;
Mat_<float> trainFeatures(numImages, size);
Mat_<int> trainLabels(1, numImages);
BYTE *temp = new BYTE[size];
BYTE tempClass = 0;
for (int i = 0; i < numImages; i++)
{
fread((void *)temp, size, 1, fp);
fread((void *)(&tempClass), sizeof(BYTE), 1, fp2);
trainLabels[0][i] = (int)tempClass;
for (int k = 0; k < size; k++)
{
trainFeatures[i][k] = (float)temp[k];
}
}
knn->train(trainFeatures, ml::ROW_SAMPLE, trainLabels);
return knn;
}
When I test the algorithm with the 10k images file MNIST provide I have: Accuracy: 96.910000 which is a good news :)
:)
The code to test the knn trained is here:
void testKnn(Ptr<ml::KNearest> knn, bool debug)
{
int totalCorrect = 0;
FILE *fp = fopen("/keep/Repo/USELESS/_sandbox/cpp/learning-cpp/sudoku/assets/t10k-images-idx3-ubyte", "rb");
FILE *fp2 = fopen("/keep/Repo/USELESS/_sandbox/cpp/learning-cpp/sudoku/assets/t10k-labels-idx1-ubyte", "rb");
int magicNumber = readFlippedInteger(fp);
int numImages = readFlippedInteger(fp);
int numRows = readFlippedInteger(fp);
int numCols = readFlippedInteger(fp);
fseek(fp2, 0x08, SEEK_SET);
int size = numRows * numCols;
Mat_<float> testFeatures(numImages, size);
Mat_<int> expectedLabels(1, numImages);
BYTE *temp = new BYTE[size];
BYTE tempClass = 0;
int K = 1;
Mat response, dist, m;
for (int i = 0; i < numImages; i++)
{
if (i % 1000 == 0 && i != 0)
{
cout << i << endl;
}
fread((void *)temp, size, 1, fp);
fread((void *)(&tempClass), sizeof(BYTE), 1, fp2);
expectedLabels[0][i] = (int)tempClass;
for (int k = 0; k < size; k++)
{
testFeatures[i][k] = (float)temp[k];
}
// test to verify if createMatFromMNIST and createMatToMNIST are well.
m = testFeatures.row(i);
knn->findNearest(m, K, noArray(), response, dist);
if (debug)
{
cout << "response: " << response << endl;
cout << "dist: " << dist << endl;
Mat m2 = createMatFromMNIST(m);
showImage(m2);
// Mat m3 = createMatToMNIST(m2);
// showImage(m3);
}
if (expectedLabels[0][i] == response.at<float>(0))
{
totalCorrect++;
}
}
printf("Accuracy: %f ", (double)totalCorrect * 100 / (double)numImages);
}
By the way, you can test the knn I have implemented in my project here: (see the actions part) https://bitbucket.org/BenNG/sudoku-recognizer
But when it comes to use my own data against the algo, it has a bad behavior.
What is the data I give to the algo ?
To answer that I will present a bit my project.
My project is a sudoku grabber. So on a picture that holds a sudoku, I'm able to find the sudoku and extract it. Then I'm able to extract every cell in the puzzle.
Each cell is preprocessed before I send it to the knn.
By the way, you can also see the extraction of the puzzle and cells here
For the last part which is sending the extracted number to the knn I:
- clean and extract only the number
- resize the image to 20x20
- copy this 20x20 in a 28x28 black (adding border)
- centerize 20x20 in 28x28 (still need to use the moment and not the middle of the picture as describe in the MNIST description)
here is the code:
Ptr<ml::KNearest> knn = getKnn();
string fullName = p.string();
Mat raw = imread(fullName, CV_LOAD_IMAGE_GRAYSCALE);
Mat sudoku = extractPuzzle(raw);
for (int k = 0; k < 81; k++)
{
Mat cell = extractCell(sudoku, k);
Mat roi = extractNumber(cell);
if (!roi.empty())
{
adaptiveThreshold(roi, fin, 255, ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY_INV, 3, 1);
fin2 = removeTinyVolume(fin, 90, Scalar(0, 0, 0));
vector<double> v = findBiggestComponent(fin2);
double left = v[0];
double top = v[1];
double width = v[2];
double height = v[3];
double x = v[4];
double y = v[5];
Rect rect(left, top, width, height);
fin3 = fin2(rect);
Mat normalized = normalizeSize(fin3), dest;
int notZero = 0;
int sumI = 0, sumY = 0;
int size = 28;
int mid = size / 2;
Mat output = Mat::zeros(size, size, CV_32F);
normalized.copyTo(output(Rect((mid - sumI / (double)notZero), (mid - sumY / (double)notZero), normalized.cols, normalized.rows)));
Mat test = createMatToMNIST(output);
knn->findNearest(test, K, noArray(), response, dist);
cout << "response: " << response << endl;
cout << "dist: " << dist << endl;
}
}
I transform the 28x28 in a 1x784 Mat and give that to the knn and the answer is most of the time
response: [5]
dist: [-nan]
but when I use a Mat from MNIST the result is accurate
response: [7]
dist: [457766]
left is mine
right is mnist
Do you have any ideas ?
edit: 5 minutes after I publish the post I found something
this commit fixes the "no result" at all but still the network is not accurate enough