Revision history [back]

Head pose estimation fails with specific image sizes

I want to find angles of rotation of the head using opencv and dlib. So, I tried to use this code from the tutorial:

cv::Mat im = imread("img.jpg");

matrix<bgr_pixel> dlibImage;
assign_image(dlibImage, cv_image<bgr_pixel>(im));

auto face = detector(dlibImage)[0];
auto shape = sp(dlibImage, face);

// 2D image points.
std::vector<cv::Point2d> image_points;
image_points.push_back(cv::Point2d(shape.part(30).x(), shape.part(30).y()));    // Nose tip
image_points.push_back(cv::Point2d(shape.part(8).x(), shape.part(8).y()));    // Chin
image_points.push_back(cv::Point2d(shape.part(36).x(), shape.part(36).y()));     // Left eye left corner
image_points.push_back(cv::Point2d(shape.part(45).x(), shape.part(45).y()));    // Right eye right corner
image_points.push_back(cv::Point2d(shape.part(48).x(), shape.part(48).y()));    // Left Mouth corner
image_points.push_back(cv::Point2d(shape.part(54).x(), shape.part(54).y()));    // Right mouth corner


// 3D model points.
std::vector<cv::Point3d> model_points;
model_points.push_back(cv::Point3d(0.0f, 0.0f, 0.0f));               // Nose tip
model_points.push_back(cv::Point3d(0.0f, -330.0f, -65.0f));          // Chin
model_points.push_back(cv::Point3d(-225.0f, 170.0f, -135.0f));       // Left eye left corner
model_points.push_back(cv::Point3d(225.0f, 170.0f, -135.0f));        // Right eye right corner
model_points.push_back(cv::Point3d(-150.0f, -150.0f, -125.0f));      // Left Mouth corner
model_points.push_back(cv::Point3d(150.0f, -150.0f, -125.0f));       // Right mouth corner

// Camera internals
double focal_length = im.cols; // Approximate focal length.
Point2d center = cv::Point2d(im.cols/2,im.rows/2);
cv::Mat camera_matrix = (cv::Mat_<double>(3,3) << focal_length, 0, center.x, 0 , focal_length, center.y, 0, 0, 1);
cv::Mat dist_coeffs = cv::Mat::zeros(4,1,cv::DataType<double>::type); // Assuming no lens distortion

cout << "Camera Matrix " << endl << camera_matrix << endl ;
// Output rotation and translation
cv::Mat rotation_vector; // Rotation in axis-angle form

cv::Mat translation_vector;

// Solve for pose
cv::solvePnP(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector);


// Project a 3D point (0, 0, 1000.0) onto the image plane.
// We use this to draw a line sticking out of the nose

std::vector<Point3d> nose_end_point3D;
std::vector<Point2d> nose_end_point2D;
nose_end_point3D.push_back(Point3d(0,0,1000.0));

projectPoints(nose_end_point3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs, nose_end_point2D);


for(int i=0; i < image_points.size(); i++)
{
    circle(im, image_points[i], 3, Scalar(0,0,255), -1);
}

cv::line(im,image_points[0], nose_end_point2D[0], cv::Scalar(255,0,0), 2);

cout << "Rotation Vector " << endl << rotation_vector << endl;
cout << "Translation Vector" << endl << translation_vector << endl;

cout <<  nose_end_point2D << endl;

// Display image.
cv::imshow("Output", im);
cv::waitKey(0);

But, unfortunately, I get completely different results depending on the size of the same image!

If I use this img.jpg which has size 299x299 px(many sizes are ok, but we take the nearest), then all ok and I get right result:

Output:

Rotation Vector 
[-0,04450161828760668;
 -2,133664002574712;
 -0,2208024002827168]

But if I use this img.jpg which has size 298x298 px, then I get absolutely wrong result:

Output:

Rotation Vector 
[-2,999117288644056;
 0,0777816930911016;
 -0,7573144061217354]

I also understood that it's due to the coords of the landmarks, not due to size of image, because result are same for the same hardcoded landmarks while sizes of this image are different.

How can I always get a correct pose estimation, as in the first case?

P.S. also, I want to note this problem has very nondeterministic behaviour - now all ok with 298x298, but I get wrong result with 297x297 size.

Head pose estimation fails with specific image sizes

I want to find angles of rotation of the head using opencv and dlib. So, I tried to use this code from the tutorial:

cv::Mat im = imread("img.jpg");

matrix<bgr_pixel> dlibImage;
assign_image(dlibImage, cv_image<bgr_pixel>(im));

auto face = detector(dlibImage)[0];
auto shape = sp(dlibImage, face);

// 2D image points.
std::vector<cv::Point2d> image_points;
image_points.push_back(cv::Point2d(shape.part(30).x(), shape.part(30).y()));    // Nose tip
image_points.push_back(cv::Point2d(shape.part(8).x(), shape.part(8).y()));    // Chin
image_points.push_back(cv::Point2d(shape.part(36).x(), shape.part(36).y()));     // Left eye left corner
image_points.push_back(cv::Point2d(shape.part(45).x(), shape.part(45).y()));    // Right eye right corner
image_points.push_back(cv::Point2d(shape.part(48).x(), shape.part(48).y()));    // Left Mouth corner
image_points.push_back(cv::Point2d(shape.part(54).x(), shape.part(54).y()));    // Right mouth corner


// 3D model points.
std::vector<cv::Point3d> model_points;
model_points.push_back(cv::Point3d(0.0f, 0.0f, 0.0f));               // Nose tip
model_points.push_back(cv::Point3d(0.0f, -330.0f, -65.0f));          // Chin
model_points.push_back(cv::Point3d(-225.0f, 170.0f, -135.0f));       // Left eye left corner
model_points.push_back(cv::Point3d(225.0f, 170.0f, -135.0f));        // Right eye right corner
model_points.push_back(cv::Point3d(-150.0f, -150.0f, -125.0f));      // Left Mouth corner
model_points.push_back(cv::Point3d(150.0f, -150.0f, -125.0f));       // Right mouth corner

// Camera internals
double focal_length = im.cols; // Approximate focal length.
Point2d center = cv::Point2d(im.cols/2,im.rows/2);
cv::Mat camera_matrix = (cv::Mat_<double>(3,3) << focal_length, 0, center.x, 0 , focal_length, center.y, 0, 0, 1);
cv::Mat dist_coeffs = cv::Mat::zeros(4,1,cv::DataType<double>::type); // Assuming no lens distortion

cout << "Camera Matrix " << endl << camera_matrix << endl ;
// Output rotation and translation
cv::Mat rotation_vector; // Rotation in axis-angle form

cv::Mat translation_vector;

// Solve for pose
cv::solvePnP(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector);


// Project a 3D point (0, 0, 1000.0) onto the image plane.
// We use this to draw a line sticking out of the nose

std::vector<Point3d> nose_end_point3D;
std::vector<Point2d> nose_end_point2D;
nose_end_point3D.push_back(Point3d(0,0,1000.0));

projectPoints(nose_end_point3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs, nose_end_point2D);


for(int i=0; i < image_points.size(); i++)
{
    circle(im, image_points[i], 3, Scalar(0,0,255), -1);
}

cv::line(im,image_points[0], nose_end_point2D[0], cv::Scalar(255,0,0), 2);

cout << "Rotation Vector " << endl << rotation_vector << endl;
cout << "Translation Vector" << endl << translation_vector << endl;

cout <<  nose_end_point2D << endl;

// Display image.
cv::imshow("Output", im);
cv::waitKey(0);

But, unfortunately, I get completely different results depending on the size of the same image!

If I use this img.jpg which has size 299x299 px(many sizes are ok, but we take the nearest), then all ok and I get right result:

Output:

Rotation Vector 
[-0,04450161828760668;
 -2,133664002574712;
 -0,2208024002827168]

But if I use this img.jpg which has size 298x298 px, then I get absolutely wrong result:

Output:

Rotation Vector 
[-2,999117288644056;
 0,0777816930911016;
 -0,7573144061217354]

I also understood that it's due to the coords of the landmarks, not due to size of image, because result are same for the same hardcoded landmarks while sizes of this image are different.

How can I always get a correct pose estimation, as in the first case?

P.S. also, I want to note this problem has very nondeterministic behaviour - now all ok with 298x298, but I get wrong result with 297x297 size.

UPD:

I get wrong result with these image_points:

 image_points.push_back(cv::Point2d(245, 132));
 image_points.push_back(cv::Point2d(220, 263));
 image_points.push_back(cv::Point2d(136, 76));
 image_points.push_back(cv::Point2d(259, 74));
 image_points.push_back(cv::Point2d(172, 196));
 image_points.push_back(cv::Point2d(254, 191));

And correct result with these:

image_points.push_back(cv::Point2d(244, 134));
image_points.push_back(cv::Point2d(220, 264));
image_points.push_back(cv::Point2d(137, 74));
image_points.push_back(cv::Point2d(260, 73));
image_points.push_back(cv::Point2d(173, 197));
image_points.push_back(cv::Point2d(256, 192));