There are a lot of ways to do that...
One way it can be done is like at http://nghiaho.com/uploads/code/videostab.cpp
(frame processing snippet)
cap >> cur;
if(cur.data == NULL) {
break;
}
cvtColor(cur, cur_grey, COLOR_BGR2GRAY);
// vector from prev to cur
vector <Point2f> prev_corner, cur_corner;
vector <Point2f> prev_corner2, cur_corner2;
vector <uchar> status;
vector <float> err;
goodFeaturesToTrack(prev_grey, prev_corner, 200, 0.01, 30);
calcOpticalFlowPyrLK(prev_grey, cur_grey, prev_corner, cur_corner, status, err);
// weed out bad matches
for(size_t i=0; i < status.size(); i++) {
if(status[i]) {
prev_corner2.push_back(prev_corner[i]);
cur_corner2.push_back(cur_corner[i]);
}
}
// translation + rotation only
Mat T = estimateRigidTransform(prev_corner2, cur_corner2, false); // false = rigid transform, no scaling/shearing
// in rare cases no transform is found. We'll just use the last known good transform.
if(T.data == NULL) {
last_T.copyTo(T);
}
T.copyTo(last_T);
// decompose T
double dx = T.at<double>(0,2);
double dy = T.at<double>(1,2);
double da = atan2(T.at<double>(1,0), T.at<double>(0,0));
prev_to_cur_transform.push_back(TransformParam(dx, dy, da));
out_transform << k << " " << dx << " " << dy << " " << da << endl;
cur.copyTo(prev);
cur_grey.copyTo(prev_grey);
cout << "Frame: " << k << "/" << max_frames << " - good optical flow: " << prev_corner2.size() << endl;
k++;
(snippet)
Another way that is very robust is to take a few e.g. 64x64 gray blocks of the image at fixed positions
and correlate them with the following frame. The position of the maximum value of the resulting matrix corresponds to the translation.
You might want to apply lens distortion before if you want to have real nice results and you're "zoomed out".
W.