Hi all,

I hope someone could please help me understand why processing would take so long on an iPhone 6. I'm currently trying to do video stabilization using the following:

+ (void)stabilizeVideoFileAtURL:(NSURL *)inputURL writeToURL:(NSURL *)outputURL {
  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
    String inputFile = *new String(inputURL.path.UTF8String);
    cout << "Input: " << inputFile << endl;

    String outputFile = *new String(outputURL.path.UTF8String);
    cout << "Output: " << outputFile << endl;

    VideoCapture cap(inputFile);

    Mat cur, cur_grey, cur_orig;
    Mat prev, prev_grey, prev_orig;

    cap >> prev;
    cvtColor(prev, prev_grey, COLOR_BGR2GRAY);

    // Step 1 - Get previous to current frame transformation (dx, dy, da) for all frames
    vector<TransformParam> prev_to_cur_transform;

    int frames = 1;
    int max_frames = cap.get(CV_CAP_PROP_FRAME_COUNT);
    cout << "Max Frames: " << max_frames << endl;

    Mat last_T;

    while (true) {
      cap >> cur;

      if ( == NULL) {
        cout << "Current data is NULL, breaking out...1" << endl;

      cvtColor(cur, cur_grey, COLOR_BGR2GRAY);

      // Vector from prev to cur
      vector<Point2f> prev_corner, cur_corner;
      vector<Point2f> prev_corner2, cur_corner2;
      vector<uchar> status;
      vector<float> error;

      goodFeaturesToTrack(prev_grey, prev_corner, 200, 0.01, 30);
      calcOpticalFlowPyrLK(prev_grey, cur_grey, prev_corner, cur_corner, status, error);

      // Weed out bad matches
      for (size_t i = 0; i < status.size(); i++) {
        if (status[i]) {

      // Translation + Rotation only
      Mat T = estimateRigidTransform(prev_corner, cur_corner, false);

      if ( == NULL) {
        cout << "No Transform was found" << endl;


      // Decompose T
      double dx =<double>(0,2);
      double dy =<double>(1,2);
      double da = atan2(<double>(1,0),<double>(0,0));

      prev_to_cur_transform.push_back(TransformParam(dx, dy, da));



    // Step 2 - Accumulate the transformations to get the image trajectory
    // Accumulated frame to frame transform
    double x = 0;
    double y = 0;
    double a = 0;

    vector<Trajectory> trajectory; // Trajectory at all frames

    for (size_t i = 0; i < prev_to_cur_transform.size(); i++) {
      x += prev_to_cur_transform[i].dx;
      y += prev_to_cur_transform[i].dy;
      a += prev_to_cur_transform[i].da;

      trajectory.push_back(Trajectory(x, y, a));

    // Step 3 - Smooth out the trajectory using an averaging window
    vector<Trajectory> smoothed_trajectory; // Trajectory at all frames

    for (size_t i = 0; i < trajectory.size(); i++) {
      double sum_x = 0;
      double sum_y = 0;
      double sum_a = 0;
      int count = 0;

      for (int j = -SMOOTHING_RADIUS; j <= SMOOTHING_RADIUS; j++) {
        if (i + j >= 0 && i + j < trajectory.size()) {
          sum_x += trajectory[i+j].x;
          sum_y += trajectory[i+j].y;
          sum_a += trajectory[i+j].a;


      double avg_x = sum_x / count;
      double avg_y = sum_y / count;
      double avg_a = sum_a / count;

      smoothed_trajectory.push_back(Trajectory(avg_x, avg_y, avg_a));

    // Step 4 - Generate new set of previous to current transform, such that the trajectory ends up being the same as the smoothed trajectory
    vector<TransformParam> new_prev_to_cur_transform;

    // Accumulated frame to frame transform
    x = 0;
    y = 0;
    a = 0;

    for (size_t i = 0; i < prev_to_cur_transform.size(); i++) {
      x += prev_to_cur_transform[i].dx;
      y += prev_to_cur_transform[i].dy;
      a += prev_to_cur_transform[i].da;

      // Target - Current
      double diff_x = smoothed_trajectory[i].x - x;
      double diff_y = smoothed_trajectory[i].y - y;
      double diff_a = smoothed_trajectory[i].a - a;

      double dx = prev_to_cur_transform[i].dx + diff_x;
      double dy = prev_to_cur_transform[i].dy + diff_y;
      double da = prev_to_cur_transform[i].da + diff_a;

      new_prev_to_cur_transform ...
