I tried to train OpenCV SVM on MNIST dataset and I got weird results, i.e. test accuray at 10 %. Any idea what went wrong? Thanks in advance.
Here are the parameters I used:
Ptr<SVM> model = SVM::create();
model->setType(SVM::C_SVC);
model->setKernel(SVM::RBF);
model->setC(10);
model->setGamma(0.01);
Training:
model->train(tdata);
Testing: (from letter_recog.cpp sample)
static void test_and_save_classifier(const Ptr<StatModel>& model,
const Mat& data, const Mat& responses,
int ntrain_samples, int rdelta,
const string& filename_to_save)
{
int i, nsamples_all = data.rows;
double train_hr = 0, test_hr = 0;
// compute prediction error on
// train data[0 , ..., ntrain_samples-1]; and
// test data[0 , ..., nsamples_all-1]
before = static_cast<double>(getTickCount());
for( i = 0; i < nsamples_all; i++ )
{
Mat sample = data.row(i);
// The method is used to predict the response for a new sample.
// In case of a classification, the method returns the class label.
float r = model->predict( sample ); /// sample is the row feature vector
// Tally correct classifications
// +1 if prediction is correct
// +0 if prediction is wrong
r = std::abs(r + rdelta - responses.at<int>(i)) <= FLT_EPSILON ? 1.f : 0.f;
if( i < ntrain_samples )
train_hr += r;
else
test_hr += r;
}
after = static_cast<double>(getTickCount());
duration_in_ms = 1000.0*(after - before)/getTickFrequency();
cout << "Prediction for all data completed after "<< duration_in_ms <<" ms...\n";
cout << "Average prediction time per sample is "<< duration_in_ms/nsamples_all <<" ms.\n";
test_hr /= nsamples_all - ntrain_samples;
train_hr = ntrain_samples > 0 ? train_hr/ntrain_samples : 1.;
/// Note: 0 training samples here will give 100 % training error
printf( "Recognition rate: train = %.2f%%, test = %.2f%%\n",
train_hr*100., test_hr*100. );
if( !filename_to_save.empty() )
{
model->save( filename_to_save );
}
}
Test run:
trainVecLabels.size() = 60000
trainVecLabels[0] = 5
testData.size() = 10000
testVecLabels.size() = 10000
testVecLabels[0] = 7
data.size() = [784 x 70000]
responses.size() = [1 x 70000]
Training the classifier ...
Training completed after 11.1005 mins...
Testing and saving ...
Prediction for all data completed after 443715 ms...
Average prediction time per sample is 6.33879 ms.
Recognition rate: train = 24.92%, test = 10.09%