Java and haarcascade face and mouth detection - mouth as the nose
Today I begin to test the project which detects a smile in Java and OpenCv. To recognition face and mouth project used haarcascade_frontalface_alt and haarcascade_mcs_mouth But i don't understand why in some reasons project detect nose as a mouth. I have two methods:
private ArrayList<Mat> detectMouth(String filename) {
int i = 0;
ArrayList<Mat> mouths = new ArrayList<Mat>();
// reading image in grayscale from the given path
image = Highgui.imread(filename, Highgui.CV_LOAD_IMAGE_GRAYSCALE);
MatOfRect faceDetections = new MatOfRect();
// detecting face(s) on given image and saving them to MatofRect object
faceDetector.detectMultiScale(image, faceDetections);
System.out.println(String.format("Detected %s faces", faceDetections.toArray().length));
MatOfRect mouthDetections = new MatOfRect();
// detecting mouth(s) on given image and saving them to MatOfRect object
mouthDetector.detectMultiScale(image, mouthDetections);
System.out.println(String.format("Detected %s mouths", mouthDetections.toArray().length));
for (Rect face : faceDetections.toArray()) {
Mat outFace = image.submat(face);
// saving cropped face to picture
Highgui.imwrite("face" + i + ".png", outFace);
for (Rect mouth : mouthDetections.toArray()) {
// trying to find right mouth
// if the mouth is in the lower 2/5 of the face
// and the lower edge of mouth is above of the face
// and the horizontal center of the mouth is the enter of the face
if (mouth.y > face.y + face.height * 3 / 5 && mouth.y + mouth.height < face.y + face.height
&& Math.abs((mouth.x + mouth.width / 2)) - (face.x + face.width / 2) < face.width / 10) {
Mat outMouth = image.submat(mouth);
// resizing mouth to the unified size of trainSize
Imgproc.resize(outMouth, outMouth, trainSize);
mouths.add(outMouth);
// saving mouth to picture
Highgui.imwrite("mouth" + i + ".png", outMouth);
i++;
}
}
}
return mouths;
}
and detect smile
private void detectSmile(ArrayList<Mat> mouths) {
trainSVM();
CvSVMParams params = new CvSVMParams();
// set linear kernel (no mapping, regression is done in the original feature space)
params.set_kernel_type(CvSVM.LINEAR);
// train SVM with images in trainingImages, labels in trainingLabels, given params with empty samples
clasificador = new CvSVM(trainingImages, trainingLabels, new Mat(), new Mat(), params);
// save generated SVM to file, so we can see what it generated
clasificador.save("svm.xml");
// loading previously saved file
clasificador.load("svm.xml");
// returnin, if there aren't any samples
if (mouths.isEmpty()) {
System.out.println("No mouth detected");
return;
}
for (Mat mouth : mouths) {
Mat out = new Mat();
// converting to 32 bit floating point in gray scale
mouth.convertTo(out, CvType.CV_32FC1);
if (clasificador.predict(out.reshape(1, 1)) == 1.0) {
System.out.println("Detected happy face");
} else {
System.out.println("Detected not a happy face");
}
}
}
training method:
private void train(String flag) {
String path;
if (flag.equalsIgnoreCase("positive")) {
path = trainPath + "smile/";
} else {
path = trainPath + "neutral/";
}
for (File file : new File(path).listFiles()) {
Mat img = new Mat();
Mat con = Highgui.imread(file.getAbsolutePath(), Highgui.CV_LOAD_IMAGE_GRAYSCALE);
con.convertTo(img, CvType.CV_32FC1, 1.0 / 255.0);
Imgproc.resize(img, img, trainSize);
trainingImages.push_back(img.reshape(1, 1));
if (flag.equalsIgnoreCase("positive")) {
trainingLabels.push_back(Mat.ones(new Size(1, 1), CvType.CV_32FC1));
} else {
trainingLabels.push_back(Mat.zeros(new Size(1, 1), CvType.CV_32FC1));
}
}
}
Examples: For that picture
correctly detects ...
Aahhh the world of computer vision, where visual agreement is not the same as how patches are represented inside the feature space. Take a look at this research of MIT, and you will discover quite fast why an in the first place, not mouth image, can still be classified as mouth by a model :)