Hi everyone,
I am trying to train a Facemark Kazemi model. I am following the this guide and sample code.
Is there anyway to train the model where I can avoid loading all training images at once as they do in the sample and guide? I run out of memory after creating Mats for ~300 of the 2000 images in the HELEN training dataset :(
EDIT:
I was able to load all training data using a 64bit process as StevenPuttemans suggested!
I trained with the 2000 image training set from the HELEN dataset and model ended up being 39.5MB. The training took about 1.5 days to complete.
Although the model seemed to train okay, I am not getting very good results and, when I try to detect landmarks in real-time (video/webcam stream), the model is very slow :( The results I get using the LBF facemark class and its pretrained model are far better in terms of speed and accuracy. This makes me nervous that I did something incorrectly when training the Kazemi model.
One possible problem I noticed was that, when training the model, I get a message saying "[ INFO:0] Initialize OpenCL runtime..." rather than the "Training with 3080 samples" message which the tutorial lists. I also get this "[ INFO:0] Initialize OpenCL runtime..." message when loading the model I created. Is this message signaling that something is wrong with how I trained the model? I don't recieve this model message when using the LBF and AAM facemark classes. Furthermore, did the author of the tutorial use 3080 training images rather than 2000 training training images? If anyone sees how I can improve my model's accuracy/speed please let me know!
Here is my training code. I use this sample config file and the haarcascade_frontalface_default:
#include "opencv2/face.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/objdetect.hpp"
#include <iostream>
#include <vector>
#include <string>
using namespace std;
using namespace cv;
using namespace cv::face;
static bool myDetector(InputArray image, OutputArray faces, CascadeClassifier *face_cascade)
{
Mat gray;
if (image.channels() > 1)
cvtColor(image, gray, COLOR_BGR2GRAY);
else
gray = image.getMat().clone();
equalizeHist(gray, gray);
vector<Rect> faces_;
face_cascade->detectMultiScale(gray, faces_, 1.4, 2, CASCADE_SCALE_IMAGE, Size(30, 30));
Mat(faces_).copyTo(faces);
return true;
}
int main(int argc, char** argv) {
string annotations = "annotations.txt";
string imagesList = "images.txt";
string configfile_name = "sample_config_file.xml";
string modelfile_name = "model.dat";
string cascade_name = "haarcascade_frontalface_default.xml";
Size scale(460, 460);
CascadeClassifier face_cascade;
face_cascade.load(cascade_name);
FacemarkKazemi::Params params;
params.configfile = configfile_name;
Ptr<FacemarkKazemi> facemark = FacemarkKazemi::create(params);
facemark->setFaceDetector((FN_FaceDetector)myDetector, &face_cascade);
std::vector<String> images;
std::vector<std::vector<Point2f> > facePoints;
loadTrainingData(imagesList, annotations, images, facePoints, 0.0);
vector<Mat> Trainimages;
std::vector<std::vector<Point2f> > Trainlandmarks;
Mat src;
for (unsigned long i = 0; i < images.size(); i++) {
src = imread(images.at(i));
std::cout << "Image " << i << " " << src.rows << " " << src.cols << endl;
if (src.empty()) {
cout << images.at(i) << endl;
cerr << string("Image not found\n.Aborting...") << endl;
continue;
}
std::cout << "Annotation " << i << " " << facePoints.at(i).size() << endl;
Trainimages.push_back(src);
Trainlandmarks.push_back(facePoints.at(i));
}
cout << "Got data" << endl;
facemark->training(Trainimages, Trainlandmarks, configfile_name, scale, modelfile_name);
cout << "Training complete" << endl;
return 0;
}