Revision history [back]

Getting to grips with video stabilisation in python

I've been trying to get to grips with video stabilisation using opeCV and Python. The below seems to run, and to successfully keep track of points from frame to frame. However, my attempts to apply the offset between frames to prevent jitter fails completely - not with an error, just without any obviously useful effect.

I suspect I am doing something very obviously wrong here, but am getting square eyes, and would appreciate any guidance!

import numpy as np
import cv2
import sys

vid=sys.argv[1]
border_crop=10
show_points=True
inter_frame_delay=20

cap = cv2.VideoCapture(vid)

# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 50,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )

# Parameters for lucas kanade optical flow
lk_params = dict( winSize  = (15,15),
                  maxLevel = 4,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Take first frame and find corners in it
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

rows,cols = old_gray.shape
print "Video resolution: "+str(cols)+"*"+str(rows)
raw_input("Press Enter to continue...")

points_to_track = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)

print "Trackable points detected in first frame:"
print points_to_track

frame_mask = np.zeros_like(old_frame)

while(1):
    ret,frame = cap.read()
    if not ret:
        break

    #Read a frame and convert it to greyscale
    new_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    #calculate optical flow between the latest frame (new_gray) and the last one we examined
    print "Searching for optical flow between this frame and the last..."
    new_points, matched, err = cv2.calcOpticalFlowPyrLK(old_gray, new_gray, points_to_track, None, **lk_params)

    # Select good tracked points - matched==1 if the point has been found in the new frame
    new = new_points[matched==1]
    old = points_to_track[matched==1]

    #Cast to int - may not be required, but we are dealing with pixel coordinates...
    new=new.astype(int)
    old=old.astype(int)

    print "Old point coordinates:"
    print old

    print "New point coordinates:"
    print new
    delta=new-old

    print "Succesfully tracked points delta:"
    print delta
    print "Average shift:"
    print "Tracked points: "+str(len(delta))

    mean_shift=np.mean(delta,axis=0)
    mean_shift=mean_shift.astype(int)

    print "Mean shift:"
    print mean_shift
    x_shift=1*mean_shift[0]
    y_shift=1*mean_shift[1]
    print "Adjusting: X:"+str(x_shift)+",Y:"+str(y_shift)

    transformation_matrix = np.float32([[1,0,x_shift],[0,1,y_shift]])
    stabilized_frame = cv2.warpAffine(frame,transformation_matrix,(cols,rows))
    cropped_stabilized_frame = stabilized_frame[border_crop:rows-border_crop, border_crop:cols-border_crop]

    if show_points:
        for point in new:
            corner_x=point[0]
            corner_y=point[1]
            frame = cv2.circle(frame,(corner_x,corner_y),5,(0,0,0),-1)

        for point in old:
            corner_x=point[0]
            corner_y=point[1]
            frame = cv2.circle(frame,(corner_x,corner_y),5,(255,255,255),-1)

    cv2.imshow('original frame',frame)
    cv2.imshow('stabilised frame',stabilized_frame)
    cv2.imshow('cropped stabilised frame',cropped_stabilized_frame)
    cv2.waitKey(inter_frame_delay)

    old_gray = new_gray.copy()
    points_to_track = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)

raw_input("Press Enter to continue...")
cv2.destroyAllWindows()
cap.release()

Getting to grips with video stabilisation in python

I suspect I am doing something very obviously wrong here, but am getting square eyes, and would appreciate any guidance!

import numpy as np
import cv2
import sys

vid=sys.argv[1]
border_crop=10
show_points=True
inter_frame_delay=20

cap = cv2.VideoCapture(vid)

# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 50,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )

# Parameters for lucas kanade optical flow
lk_params = dict( winSize  = (15,15),
                  maxLevel = 4,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Take first frame and find corners in it
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
transformation_matrix_avg = cv2.estimateRigidTransform(old_frame, old_frame, False)


rows,cols = old_gray.shape
print "Video resolution: "+str(cols)+"*"+str(rows)
raw_input("Press Enter to continue...")

points_to_track = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)

print "Trackable points detected in first frame:"
print points_to_track

frame_mask = np.zeros_like(old_frame)

while(1):
    ret,frame = cap.read()
    if not ret:
        break

    #Read a frame and convert it to greyscale
    new_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    #calculate optical flow between the latest frame (new_gray) and the last one we examined
    print "Searching for optical flow between this frame and the last..."
    new_points, matched, err = cv2.calcOpticalFlowPyrLK(old_gray, new_gray, points_to_track, None, **lk_params)

    # Select good tracked points - matched==1 if the point has been found in the new frame
    new = new_points[matched==1]
    old = points_to_track[matched==1]

    #Cast to int - may not be required, but we are dealing with pixel coordinates...
    new=new.astype(int)
    old=old.astype(int)

    print "Old point coordinates:"
    print old

    print "New point coordinates:"
    print new
    delta=new-old

    print "Succesfully tracked points delta:"
    print delta
    print "Average shift:"
    print "Tracked points: "+str(len(delta))

    mean_shift=np.mean(delta,axis=0)
    mean_shift=mean_shift.astype(int)

    print "Mean shift:"
    print mean_shift
    x_shift=1*mean_shift[0]
    y_shift=1*mean_shift[1]
    print "Adjusting: X:"+str(x_shift)+",Y:"+str(y_shift)

    transformation_matrix = np.float32([[1,0,x_shift],[0,1,y_shift]])
cv2.estimateRigidTransform(new, old, False)
    transformation_matrix_avg+=(transformation_matrix-transformation_matrix_avg)/2.0;

    stabilized_frame = cv2.warpAffine(frame,transformation_matrix,(cols,rows))
cv2.warpAffine(frame,transformation_matrix_avg,(cols,rows),flags=cv2.INTER_NEAREST|cv2.WARP_INVERSE_MAP)
    cropped_stabilized_frame = stabilized_frame[border_crop:rows-border_crop, border_crop:cols-border_crop]

    if show_points:
        for point in new:
            corner_x=point[0]
            corner_y=point[1]
            frame = cv2.circle(frame,(corner_x,corner_y),5,(0,0,0),-1)
cv2.circle(frame,(corner_x,corner_y),2,(0,255,0),-1)

        for point in old:
            corner_x=point[0]
            corner_y=point[1]
            frame = cv2.circle(frame,(corner_x,corner_y),5,(255,255,255),-1)
cv2.circle(frame,(corner_x,corner_y),2,(255,255,255),-1)

    cv2.imshow('original frame',frame)
    cv2.imshow('stabilised frame',stabilized_frame)
    cv2.imshow('cropped stabilised frame',cropped_stabilized_frame)
    cv2.waitKey(inter_frame_delay)

    old_gray = new_gray.copy()
    points_to_track = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)

raw_input("Press Enter to continue...")
cv2.destroyAllWindows()
cap.release()

Getting to grips with video stabilisation in python

I suspect I am doing something very obviously wrong here, but am getting square eyes, and would appreciate any guidance!

import numpy as np
import cv2
import sys

vid=sys.argv[1]
border_crop=10
show_points=True
inter_frame_delay=20
smoothing_window=100

rolling_trajectory_list=[]


cap = cv2.VideoCapture(vid)

# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 50,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )

# Parameters for lucas kanade optical flow
lk_params = dict( winSize  = (15,15),
                  maxLevel = 4,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Take first frame and find corners in it
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
transformation_matrix_avg = cv2.estimateRigidTransform(old_frame, old_frame, False)


rows,cols = old_gray.shape
print "Video resolution: "+str(cols)+"*"+str(rows)
raw_input("Press Enter to continue...")

points_to_track = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)

print "Trackable points detected in first frame:"
print points_to_track

frame_mask = np.zeros_like(old_frame)

while(1):
    ret,frame = cap.read()
    if not ret:
        break

    #Read a frame and convert it to greyscale
    new_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    #calculate optical flow between the latest frame (new_gray) and the last one we examined
    print "Searching for optical flow between this frame and the last..."
    new_points, matched, err = cv2.calcOpticalFlowPyrLK(old_gray, new_gray, points_to_track, None, **lk_params)

    # Select good tracked points - matched==1 if the point has been found in the new frame
    new = new_points[matched==1]
    old = points_to_track[matched==1]

    print "Old point coordinates:"
    print old

    print "New point coordinates:"
    print new

    # This should return a transformation matrix mapping the points in "new" to "old"
    transformation_matrix = cv2.estimateRigidTransform(new, old, False)
    transformation_matrix_avg+=(transformation_matrix-transformation_matrix_avg)/2.0;

print "Transform from new frame to old frame..."
    print transformation_matrix
    # Not sure about this...trying to create an smoothed average of the frame movement over the last X frames
    rolling_trajectory_list.append(transformation_matrix)
    if len(rolling_trajectory_list) > smoothing_window:
        rolling_trajectory_list.pop(0)

    transformation_matrix_avg=sum(rolling_trajectory_list)/len(rolling_trajectory_list)

    print "Average transformation over last "+str(smoothing_window)+" frames:"
    print transformation_matrix_avg
    #Apply the transformation to the frame
    stabilized_frame = cv2.warpAffine(frame,transformation_matrix_avg,(cols,rows),flags=cv2.INTER_NEAREST|cv2.WARP_INVERSE_MAP)
    cropped_stabilized_frame = stabilized_frame[border_crop:rows-border_crop, border_crop:cols-border_crop]

    if show_points:
        for point in new:
            corner_x=point[0]
            corner_y=point[1]
            frame = cv2.circle(frame,(corner_x,corner_y),2,(0,255,0),-1)

        for point in old:
            corner_x=point[0]
            corner_y=point[1]
            frame = cv2.circle(frame,(corner_x,corner_y),2,(255,255,255),-1)

    cv2.imshow('original frame',frame)
    cv2.imshow('stabilised frame',stabilized_frame)
    cv2.imshow('cropped stabilised frame',cropped_stabilized_frame)
    cv2.waitKey(inter_frame_delay)

    old_gray = new_gray.copy()
    points_to_track = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)

raw_input("Press Enter to continue...")
cv2.destroyAllWindows()
cap.release()