Revision history [back]

Training SVM for image recognition with BOW: error on prediction

Hello!

Context: I am currently attempting to train a SVM to recognize a specific building in a scene vs. different buildings. After attempting simpler approaches like direct image feature matching + homography and such, I want to attempt something a bit more flexible and/or powerful. Hence, I have set my gaze upon BOW+SVM.

Problem: As far as I am aware, the training phase goes without a hitch. However, when I reuse (either straight in code or after reloading the saved text format) my trained vocabulary and SVM to attempt a prediction on a new image, I get an out of memory error on the svm.predict(descriptors) function:

Unhandled exception at 0x7594C41F in MachineLearningTrainerBOW.exe: Microsoft C++ exception: cv::Exception at memory location 0x0037F288.

I honestly don't know what's wrong, as I have followed examples setup in a similar manner to my current code and at a glance I do not seem to do anything outlandish or different than what they do:

https://gilscvblog.com/2013/08/23/bag-of-words-models-for-visual-categorization/

http://www.morethantechnical.com/2011/08/25/a-simple-object-classifier-with-bag-of-words-using-opencv-2-3-w-code/

http://answers.opencv.org/question/27138/setting-vocabulary-in-creating-bag-of-features-to-do-svm-classification/

http://answers.opencv.org/question/24650/how-to-2-class-categolization-using-surfbowsvm/

Could anyone point the way to a complete OpenCV tutorial or exemple covering the use of BOW+SVM, or maybe point out what I might be doing wrong from my code? Thank you!

// cluster count
const int CLUSTER_COUNT = 1000;

// create global bow vocabulary with TermCriteria as per tutorial
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("FlannBased");
Ptr<DescriptorExtractor> extractor = SurfDescriptorExtractor::create();
BOWImgDescriptorExtractor bowDE(extractor, matcher);
Ptr<SURF> detector = SURF::create(400);
BOWKMeansTrainer bowTrainer(CLUSTER_COUNT, TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, FLT_EPSILON), 1, KMEANS_PP_CENTERS);

//
//... some irrelevant functions for loading files and such...
//

void ComputeBow(vector<Mat> positives, vector<Mat> negatives, string vocabularySave)
{
    // get SURF descriptors and add to BOW each input files
    cout << "Acquiring descriptors, this might take a while... ";
    for (int i = 0; i < positives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(positives[i], keypoints);
        Mat descriptors;
        extractor->compute(positives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }

    /*for (int i = 0; i < negatives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(negatives[i], keypoints);
        Mat descriptors;
        extractor->compute(negatives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }*/
    cout << "Description complete!" << endl;

    // Create the vocabulary with KMeans.
    cout << "Clustering features, this might take a while... ";
    Mat vocabulary;
    vocabulary = bowTrainer.cluster();
    bowDE.setVocabulary(vocabulary);
    SaveVocabulary(vocabulary, vocabularySave);

    cout << "Clustering complete!" << endl;
}

void TrainSVM(vector<Mat> positiveMats, vector<Mat> negativeMats, string svmSave, string vocabularySave)
{
    //Setup the BOW
    ComputeBow(positiveMats, negativeMats, vocabularySave);

    // create training data positive and negative
    Mat train, response;
    cout << "Creating training sets, this might take a while... ";
    for (int i = 0; i < positiveMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(positiveMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(positiveMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(1);        // update response data
        }
    }

    for (int i = 0; i < negativeMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(negativeMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(negativeMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(-1);        // update response data
        }
    }

    cout << "Training... ";
    // setup svm as per tutorial values
    Ptr<SVM> svm = SVM::create();
    svm->setType(ml::SVM::C_SVC);
    svm->setKernel(ml::SVM::RBF);
    svm->setGamma(8);
    svm->setDegree(10);
    svm->setCoef0(1);
    svm->setC(10);
    svm->setNu(0.5);
    svm->setP(0.1);
    svm->setTermCriteria(cvTermCriteria(CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    svm->train(train, ROW_SAMPLE, response);
    svm->save(svmSave);

    cout << "Training complete!" << endl;
    cout << "SVM saved at " << svmSave << endl;
    cout << "Vocabulary matrix saved at " << vocabularySave << endl;
}

int Predict(string queryLoad, string svmLoad, string vocabularyLoad)
{
    // load image
    Mat img = cv::imread(queryLoad, IMREAD_GRAYSCALE);

    // load vocabulary data
    Mat vocabulary = LoadVocabulary(vocabularyLoad);

    // Set the vocabulary
    bowDE.setVocabulary(vocabulary);
    vector<KeyPoint> keypoints;
    detector->detect(img, keypoints);
    Mat descriptors;
    bowDE.compute(img, keypoints, descriptors);
    if (descriptors.empty())  return 0;

    // setup svm
    Ptr<SVM> svm = SVM::create();
    svm->load(svmLoad);
    int prediction = svm->predict(descriptors);
    cout << "Prdiction for " << queryLoad << " is: " << prediction << endl;
    return prediction;
}

int main(int argc, char** argv)
{
    //Deal with user input
    //TODO: validate file paths
    if (argc > 5 && (string)argv[1] == "/t" || (string)argv[1] == "/train")
    {
        TrainSVM(PathsToImageMats(GetFiles(argv[2])), PathsToImageMats(GetFiles(argv[3])), argv[4], argv[5]);
    }
    else if (argc < 6 && (string)argv[1] == "/p" || (string)argv[1] == "/predict")
        Predict(argv[2], argv[3], argv[4]);
    else
    {
        cout << "Accepted parameters are as follow:" << endl;
        cout << "/t positive-folder-path negative-folder-path svm-save-path vocabulary-save-path" << endl;
        cout << "/p query-image-path svm-load-path vocabulary-load-path" << endl;
    }

    system("pause");
}

Training SVM for image recognition with BOW: error on prediction

Hello!

Unhandled exception at 0x7594C41F in MachineLearningTrainerBOW.exe: Microsoft C++ exception: cv::Exception at memory location 0x0037F288.

I honestly don't know what's wrong, as I have followed examples setup in a similar manner to my current code and at a glance I do not seem to do anything outlandish or different than what they do:

https://gilscvblog.com/2013/08/23/bag-of-words-models-for-visual-categorization/

http://www.morethantechnical.com/2011/08/25/a-simple-object-classifier-with-bag-of-words-using-opencv-2-3-w-code/

http://answers.opencv.org/question/27138/setting-vocabulary-in-creating-bag-of-features-to-do-svm-classification/

http://answers.opencv.org/question/24650/how-to-2-class-categolization-using-surfbowsvm/

Could anyone point the way to a complete OpenCV tutorial or exemple covering the use of BOW+SVM, or maybe point out what I might be doing wrong from my code? Thank you!

// cluster count
const int CLUSTER_COUNT = 1000;

// create global bow vocabulary with TermCriteria as per tutorial
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("FlannBased");
Ptr<DescriptorExtractor> extractor = SurfDescriptorExtractor::create();
BOWImgDescriptorExtractor bowDE(extractor, matcher);
Ptr<SURF> detector = SURF::create(400);
BOWKMeansTrainer bowTrainer(CLUSTER_COUNT, TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, FLT_EPSILON), 1, KMEANS_PP_CENTERS);

//
//... some irrelevant functions for loading files and such...
//

void ComputeBow(vector<Mat> positives, vector<Mat> negatives, string vocabularySave)
{
    // get SURF descriptors and add to BOW each input files
    cout << "Acquiring descriptors, this might take a while... ";
    for (int i = 0; i < positives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(positives[i], keypoints);
        Mat descriptors;
        extractor->compute(positives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }

    /*for (int i = 0; i < negatives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(negatives[i], keypoints);
        Mat descriptors;
        extractor->compute(negatives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }*/
    cout << "Description complete!" << endl;

    // Create the vocabulary with KMeans.
    cout << "Clustering features, this might take a while... ";
    Mat vocabulary;
    vocabulary = bowTrainer.cluster();
    bowDE.setVocabulary(vocabulary);
    SaveVocabulary(vocabulary, vocabularySave);

    cout << "Clustering complete!" << endl;
}

void TrainSVM(vector<Mat> positiveMats, vector<Mat> negativeMats, string svmSave, string vocabularySave)
{
    //Setup the BOW
    ComputeBow(positiveMats, negativeMats, vocabularySave);

    // create training data positive and negative
    Mat train, response;
    cout << "Creating training sets, this might take a while... ";
    for (int i = 0; i < positiveMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(positiveMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(positiveMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(1);        // update response data
        }
    }

    for (int i = 0; i < negativeMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(negativeMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(negativeMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(-1);        // update response data
        }
    }

    cout << "Training... ";
    // setup svm as per tutorial values
    Ptr<SVM> svm = SVM::create();
    svm->setType(ml::SVM::C_SVC);
    svm->setKernel(ml::SVM::RBF);
    svm->setGamma(8);
    svm->setDegree(10);
    svm->setCoef0(1);
    svm->setC(10);
    svm->setNu(0.5);
    svm->setP(0.1);
    svm->setTermCriteria(cvTermCriteria(CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    svm->train(train, ROW_SAMPLE, response);
    svm->save(svmSave);

    cout << "Training complete!" << endl;
    cout << "SVM saved at " << svmSave << endl;
    cout << "Vocabulary matrix saved at " << vocabularySave << endl;
}

int Predict(string queryLoad, string svmLoad, string vocabularyLoad)
{
    // load image
    Mat img = cv::imread(queryLoad, IMREAD_GRAYSCALE);

    // load vocabulary data
    Mat vocabulary = LoadVocabulary(vocabularyLoad);

    // Set the vocabulary
    bowDE.setVocabulary(vocabulary);
    vector<KeyPoint> keypoints;
    detector->detect(img, keypoints);
    Mat descriptors;
    bowDE.compute(img, keypoints, descriptors);
    if (descriptors.empty())  return 0;

    // setup svm
    Ptr<SVM> svm = SVM::create();
    svm->load(svmLoad);
    int prediction = svm->predict(descriptors);
    cout << "Prdiction for " << queryLoad << " is: " << prediction << endl;
    return prediction;
}

int main(int argc, char** argv)
{
    //Deal with user input
    //TODO: validate file paths
    if (argc > 5 && (string)argv[1] == "/t" || (string)argv[1] == "/train")
    {
        TrainSVM(PathsToImageMats(GetFiles(argv[2])), PathsToImageMats(GetFiles(argv[3])), argv[4], argv[5]);
    }
    else if (argc < 6 && (string)argv[1] == "/p" || (string)argv[1] == "/predict")
        Predict(argv[2], argv[3], argv[4]);
    else
    {
        cout << "Accepted parameters are as follow:" << endl;
        cout << "/t positive-folder-path negative-folder-path svm-save-path vocabulary-save-path" << endl;
        cout << "/p query-image-path svm-load-path vocabulary-load-path" << endl;
    }

    system("pause");
}

EDIT: After some Step-Into debugging, this is where it fails:

float predict( InputArray _samples, OutputArray _results, int flags ) const
{
    float result = 0;
    Mat samples = _samples.getMat(), results;
    int nsamples = samples.rows;
    bool returnDFVal = (flags & RAW_OUTPUT) != 0;

    CV_Assert( samples.cols == var_count && samples.type() == CV_32F );
   //This assert fails, number of cols (1000) is not equal to var_count (-842150451)
   //What is var_count and why is it some large negative value, i have no clue

    if( _results.needed() )
    {
        _results.create( nsamples, 1, samples.type() );
        results = _results.getMat();
    }
    else
    {
        CV_Assert( nsamples == 1 );
        results = Mat(1, 1, CV_32F, &result);
    }

    PredictBody invoker(this, samples, results, returnDFVal);
    if( nsamples < 10 )
        invoker(Range(0, nsamples));
    else
        parallel_for_(Range(0, nsamples), invoker);
    return result;
}

Training SVM for image recognition with BOW: error on prediction

Hello!

Unhandled exception at 0x7594C41F in MachineLearningTrainerBOW.exe: Microsoft C++ exception: cv::Exception at memory location 0x0037F288.

I honestly don't know what's wrong, as I have followed examples setup in a similar manner to my current code and at a glance I do not seem to do anything outlandish or different than what they do:

https://gilscvblog.com/2013/08/23/bag-of-words-models-for-visual-categorization/

http://www.morethantechnical.com/2011/08/25/a-simple-object-classifier-with-bag-of-words-using-opencv-2-3-w-code/

http://answers.opencv.org/question/27138/setting-vocabulary-in-creating-bag-of-features-to-do-svm-classification/

http://answers.opencv.org/question/24650/how-to-2-class-categolization-using-surfbowsvm/

Could anyone point the way to a complete OpenCV tutorial or exemple covering the use of BOW+SVM, or maybe point out what I might be doing wrong from my code? Thank you!

// cluster count
const int CLUSTER_COUNT = 1000;

// create global bow vocabulary with TermCriteria as per tutorial
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("FlannBased");
Ptr<DescriptorExtractor> extractor = SurfDescriptorExtractor::create();
BOWImgDescriptorExtractor bowDE(extractor, matcher);
Ptr<SURF> detector = SURF::create(400);
BOWKMeansTrainer bowTrainer(CLUSTER_COUNT, TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, FLT_EPSILON), 1, KMEANS_PP_CENTERS);

//
//... some irrelevant functions for loading files and such...
//

void ComputeBow(vector<Mat> positives, vector<Mat> negatives, string vocabularySave)
{
    // get SURF descriptors and add to BOW each input files
    cout << "Acquiring descriptors, this might take a while... ";
    for (int i = 0; i < positives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(positives[i], keypoints);
        Mat descriptors;
        extractor->compute(positives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }

    /*for (int i = 0; i < negatives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(negatives[i], keypoints);
        Mat descriptors;
        extractor->compute(negatives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }*/
    cout << "Description complete!" << endl;

    // Create the vocabulary with KMeans.
    cout << "Clustering features, this might take a while... ";
    Mat vocabulary;
    vocabulary = bowTrainer.cluster();
    bowDE.setVocabulary(vocabulary);
    SaveVocabulary(vocabulary, vocabularySave);

    cout << "Clustering complete!" << endl;
}

void TrainSVM(vector<Mat> positiveMats, vector<Mat> negativeMats, string svmSave, string vocabularySave)
{
    //Setup the BOW
    ComputeBow(positiveMats, negativeMats, vocabularySave);

    // create training data positive and negative
    Mat train, response;
    cout << "Creating training sets, this might take a while... ";
    for (int i = 0; i < positiveMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(positiveMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(positiveMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(1);        // update response data
        }
    }

    for (int i = 0; i < negativeMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(negativeMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(negativeMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(-1);        // update response data
        }
    }

    cout << "Training... ";
    // setup svm as per tutorial values
    Ptr<SVM> svm = SVM::create();
    svm->setType(ml::SVM::C_SVC);
    svm->setKernel(ml::SVM::RBF);
    svm->setGamma(8);
    svm->setDegree(10);
    svm->setCoef0(1);
    svm->setC(10);
    svm->setNu(0.5);
    svm->setP(0.1);
    svm->setTermCriteria(cvTermCriteria(CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    svm->train(train, ROW_SAMPLE, response);
    svm->save(svmSave);

    cout << "Training complete!" << endl;
    cout << "SVM saved at " << svmSave << endl;
    cout << "Vocabulary matrix saved at " << vocabularySave << endl;
}

int Predict(string queryLoad, string svmLoad, string vocabularyLoad)
{
    // load image
    Mat img = cv::imread(queryLoad, IMREAD_GRAYSCALE);

    // load vocabulary data
    Mat vocabulary = LoadVocabulary(vocabularyLoad);

    // Set the vocabulary
    bowDE.setVocabulary(vocabulary);
    vector<KeyPoint> keypoints;
    detector->detect(img, keypoints);
    Mat descriptors;
    bowDE.compute(img, keypoints, descriptors);
    if (descriptors.empty())  return 0;

    // setup svm
    Ptr<SVM> svm = SVM::create();
    svm->load(svmLoad);
    int prediction = svm->predict(descriptors);
    cout << "Prdiction for " << queryLoad << " is: " << prediction << endl;
    return prediction;
}

int main(int argc, char** argv)
{
    //Deal with user input
    //TODO: validate file paths
    if (argc > 5 && (string)argv[1] == "/t" || (string)argv[1] == "/train")
    {
        TrainSVM(PathsToImageMats(GetFiles(argv[2])), PathsToImageMats(GetFiles(argv[3])), argv[4], argv[5]);
    }
    else if (argc < 6 && (string)argv[1] == "/p" || (string)argv[1] == "/predict")
        Predict(argv[2], argv[3], argv[4]);
    else
    {
        cout << "Accepted parameters are as follow:" << endl;
        cout << "/t positive-folder-path negative-folder-path svm-save-path vocabulary-save-path" << endl;
        cout << "/p query-image-path svm-load-path vocabulary-load-path" << endl;
    }

    system("pause");
}

EDIT: After some Step-Into debugging, this is where it fails:

float predict( InputArray _samples, OutputArray _results, int flags ) const
{
    float result = 0;
    Mat samples = _samples.getMat(), results;
    int nsamples = samples.rows;
    bool returnDFVal = (flags & RAW_OUTPUT) != 0;

    CV_Assert( **CV_Assert( samples.cols == var_count && samples.type() == CV_32F );
);**
   //This assert fails, number of cols (1000) is not equal to var_count (-842150451)
   //What is var_count and why is it some large negative value, i have no clue

    if( _results.needed() )
    {
        _results.create( nsamples, 1, samples.type() );
        results = _results.getMat();
    }
    else
    {
        CV_Assert( nsamples == 1 );
        results = Mat(1, 1, CV_32F, &result);
    }

    PredictBody invoker(this, samples, results, returnDFVal);
    if( nsamples < 10 )
        invoker(Range(0, nsamples));
    else
        parallel_for_(Range(0, nsamples), invoker);
    return result;
}

Training SVM for image recognition with BOW: error on prediction

Hello!

Unhandled exception at 0x7594C41F in MachineLearningTrainerBOW.exe: Microsoft C++ exception: cv::Exception at memory location 0x0037F288.

I honestly don't know what's wrong, as I have followed examples setup in a similar manner to my current code and at a glance I do not seem to do anything outlandish or different than what they do:

https://gilscvblog.com/2013/08/23/bag-of-words-models-for-visual-categorization/

http://www.morethantechnical.com/2011/08/25/a-simple-object-classifier-with-bag-of-words-using-opencv-2-3-w-code/

http://answers.opencv.org/question/27138/setting-vocabulary-in-creating-bag-of-features-to-do-svm-classification/

http://answers.opencv.org/question/24650/how-to-2-class-categolization-using-surfbowsvm/

Could anyone point the way to a complete OpenCV tutorial or exemple covering the use of BOW+SVM, or maybe point out what I might be doing wrong from my code? Thank you!

// cluster count
const int CLUSTER_COUNT = 1000;

// create global bow vocabulary with TermCriteria as per tutorial
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("FlannBased");
Ptr<DescriptorExtractor> extractor = SurfDescriptorExtractor::create();
BOWImgDescriptorExtractor bowDE(extractor, matcher);
Ptr<SURF> detector = SURF::create(400);
BOWKMeansTrainer bowTrainer(CLUSTER_COUNT, TermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, FLT_EPSILON), 1, KMEANS_PP_CENTERS);

//
//... some irrelevant functions for loading files and such...
//

void ComputeBow(vector<Mat> positives, vector<Mat> negatives, string vocabularySave)
{
    // get SURF descriptors and add to BOW each input files
    cout << "Acquiring descriptors, this might take a while... ";
    for (int i = 0; i < positives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(positives[i], keypoints);
        Mat descriptors;
        extractor->compute(positives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }

    /*for (int i = 0; i < negatives.size(); i++)
    {
        vector<KeyPoint> keypoints;
        detector->detect(negatives[i], keypoints);
        Mat descriptors;
        extractor->compute(negatives[i], keypoints, descriptors);
        if (!descriptors.empty()) bowTrainer.add(descriptors);
    }*/
    cout << "Description complete!" << endl;

    // Create the vocabulary with KMeans.
    cout << "Clustering features, this might take a while... ";
    Mat vocabulary;
    vocabulary = bowTrainer.cluster();
    bowDE.setVocabulary(vocabulary);
    SaveVocabulary(vocabulary, vocabularySave);

    cout << "Clustering complete!" << endl;
}

void TrainSVM(vector<Mat> positiveMats, vector<Mat> negativeMats, string svmSave, string vocabularySave)
{
    //Setup the BOW
    ComputeBow(positiveMats, negativeMats, vocabularySave);

    // create training data positive and negative
    Mat train, response;
    cout << "Creating training sets, this might take a while... ";
    for (int i = 0; i < positiveMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(positiveMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(positiveMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(1);        // update response data
        }
    }

    for (int i = 0; i < negativeMats.size(); i++)
    {
        // set training data using BOWImgDescriptorExtractor
        vector<KeyPoint> keypoints;
        detector->detect(negativeMats[i], keypoints);
        Mat descriptors;
        bowDE.compute(negativeMats[i], keypoints, descriptors);
        if (!descriptors.empty())
        {
            train.push_back(descriptors);     // update training data
            response.push_back(-1);        // update response data
        }
    }

    cout << "Training... ";
    // setup svm as per tutorial values
    Ptr<SVM> svm = SVM::create();
    svm->setType(ml::SVM::C_SVC);
    svm->setKernel(ml::SVM::RBF);
    svm->setGamma(8);
    svm->setDegree(10);
    svm->setCoef0(1);
    svm->setC(10);
    svm->setNu(0.5);
    svm->setP(0.1);
    svm->setTermCriteria(cvTermCriteria(CV_TERMCRIT_EPS, 1000, FLT_EPSILON));
    svm->train(train, ROW_SAMPLE, response);
    svm->save(svmSave);

    cout << "Training complete!" << endl;
    cout << "SVM saved at " << svmSave << endl;
    cout << "Vocabulary matrix saved at " << vocabularySave << endl;
}

int Predict(string queryLoad, string svmLoad, string vocabularyLoad)
{
    // load image
    Mat img = cv::imread(queryLoad, IMREAD_GRAYSCALE);

    // load vocabulary data
    Mat vocabulary = LoadVocabulary(vocabularyLoad);

    // Set the vocabulary
    bowDE.setVocabulary(vocabulary);
    vector<KeyPoint> keypoints;
    detector->detect(img, keypoints);
    Mat descriptors;
    bowDE.compute(img, keypoints, descriptors);
    if (descriptors.empty())  return 0;

    // setup svm
    Ptr<SVM> svm = SVM::create();
    svm->load(svmLoad);
    int prediction = svm->predict(descriptors);
    cout << "Prdiction for " << queryLoad << " is: " << prediction << endl;
    return prediction;
}

int main(int argc, char** argv)
{
    //Deal with user input
    //TODO: validate file paths
    if (argc > 5 && (string)argv[1] == "/t" || (string)argv[1] == "/train")
    {
        TrainSVM(PathsToImageMats(GetFiles(argv[2])), PathsToImageMats(GetFiles(argv[3])), argv[4], argv[5]);
    }
    else if (argc < 6 && (string)argv[1] == "/p" || (string)argv[1] == "/predict")
        Predict(argv[2], argv[3], argv[4]);
    else
    {
        cout << "Accepted parameters are as follow:" << endl;
        cout << "/t positive-folder-path negative-folder-path svm-save-path vocabulary-save-path" << endl;
        cout << "/p query-image-path svm-load-path vocabulary-load-path" << endl;
    }

    system("pause");
}

EDIT: After some Step-Into debugging, this is where it fails:

float predict( InputArray _samples, OutputArray _results, int flags ) const
{
    float result = 0;
    Mat samples = _samples.getMat(), results;
    int nsamples = samples.rows;
    bool returnDFVal = (flags & RAW_OUTPUT) != 0;

    **CV_Assert( CV_Assert( samples.cols == var_count && samples.type() == CV_32F );**
);
   //This assert fails, number of cols (1000) is not equal to var_count (-842150451)
   //What is var_count and why is it some large negative value, i have no clue

    if( _results.needed() )
    {
        _results.create( nsamples, 1, samples.type() );
        results = _results.getMat();
    }
    else
    {
        CV_Assert( nsamples == 1 );
        results = Mat(1, 1, CV_32F, &result);
    }

    PredictBody invoker(this, samples, results, returnDFVal);
    if( nsamples < 10 )
        invoker(Range(0, nsamples));
    else
        parallel_for_(Range(0, nsamples), invoker);
    return result;
}