Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Slow performance on iOS with video stabilization

Hi all,

I hope someone could please help me understand why processing would take so long on an iPhone 6. I'm currently trying to do video stabilization using the following:

+ (void)stabilizeVideoFileAtURL:(NSURL *)inputURL writeToURL:(NSURL *)outputURL {
  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
    String inputFile = *new String(inputURL.path.UTF8String);
    cout << "Input: " << inputFile << endl;

    String outputFile = *new String(outputURL.path.UTF8String);
    cout << "Output: " << outputFile << endl;

    VideoCapture cap(inputFile);
    assert(cap.isOpened());

    Mat cur, cur_grey, cur_orig;
    Mat prev, prev_grey, prev_orig;

    cap >> prev;
    cvtColor(prev, prev_grey, COLOR_BGR2GRAY);

    // Step 1 - Get previous to current frame transformation (dx, dy, da) for all frames
    vector<TransformParam> prev_to_cur_transform;

    int frames = 1;
    int max_frames = cap.get(CV_CAP_PROP_FRAME_COUNT);
    cout << "Max Frames: " << max_frames << endl;

    Mat last_T;

    while (true) {
      cap >> cur;

      if (cur.data == NULL) {
        cout << "Current data is NULL, breaking out...1" << endl;
        break;
      }

      cvtColor(cur, cur_grey, COLOR_BGR2GRAY);

      // Vector from prev to cur
      vector<Point2f> prev_corner, cur_corner;
      vector<Point2f> prev_corner2, cur_corner2;
      vector<uchar> status;
      vector<float> error;

      goodFeaturesToTrack(prev_grey, prev_corner, 200, 0.01, 30);
      calcOpticalFlowPyrLK(prev_grey, cur_grey, prev_corner, cur_corner, status, error);

      // Weed out bad matches
      for (size_t i = 0; i < status.size(); i++) {
        if (status[i]) {
          prev_corner2.push_back(prev_corner[i]);
          cur_corner2.push_back(cur_corner[i]);
        }
      }

      // Translation + Rotation only
      Mat T = estimateRigidTransform(prev_corner, cur_corner, false);

      if (T.data == NULL) {
        cout << "No Transform was found" << endl;
        last_T.copyTo(T);
      }

      T.copyTo(last_T);

      // Decompose T
      double dx = T.at<double>(0,2);
      double dy = T.at<double>(1,2);
      double da = atan2(T.at<double>(1,0), T.at<double>(0,0));

      prev_to_cur_transform.push_back(TransformParam(dx, dy, da));

      cur.copyTo(prev);
      cur_grey.copyTo(prev_grey);

      frames++;
    }

    // Step 2 - Accumulate the transformations to get the image trajectory
    // Accumulated frame to frame transform
    double x = 0;
    double y = 0;
    double a = 0;

    vector<Trajectory> trajectory; // Trajectory at all frames

    for (size_t i = 0; i < prev_to_cur_transform.size(); i++) {
      x += prev_to_cur_transform[i].dx;
      y += prev_to_cur_transform[i].dy;
      a += prev_to_cur_transform[i].da;

      trajectory.push_back(Trajectory(x, y, a));
    }

    // Step 3 - Smooth out the trajectory using an averaging window
    vector<Trajectory> smoothed_trajectory; // Trajectory at all frames

    for (size_t i = 0; i < trajectory.size(); i++) {
      double sum_x = 0;
      double sum_y = 0;
      double sum_a = 0;
      int count = 0;

      for (int j = -SMOOTHING_RADIUS; j <= SMOOTHING_RADIUS; j++) {
        if (i + j >= 0 && i + j < trajectory.size()) {
          sum_x += trajectory[i+j].x;
          sum_y += trajectory[i+j].y;
          sum_a += trajectory[i+j].a;

          count++;
        }
      }

      double avg_x = sum_x / count;
      double avg_y = sum_y / count;
      double avg_a = sum_a / count;

      smoothed_trajectory.push_back(Trajectory(avg_x, avg_y, avg_a));
    }

    // Step 4 - Generate new set of previous to current transform, such that the trajectory ends up being the same as the smoothed trajectory
    vector<TransformParam> new_prev_to_cur_transform;

    // Accumulated frame to frame transform
    x = 0;
    y = 0;
    a = 0;

    for (size_t i = 0; i < prev_to_cur_transform.size(); i++) {
      x += prev_to_cur_transform[i].dx;
      y += prev_to_cur_transform[i].dy;
      a += prev_to_cur_transform[i].da;

      // Target - Current
      double diff_x = smoothed_trajectory[i].x - x;
      double diff_y = smoothed_trajectory[i].y - y;
      double diff_a = smoothed_trajectory[i].a - a;

      double dx = prev_to_cur_transform[i].dx + diff_x;
      double dy = prev_to_cur_transform[i].dy + diff_y;
      double da = prev_to_cur_transform[i].da + diff_a;

      new_prev_to_cur_transform.push_back(TransformParam(dx, dy, da));
    }

    // Step 5 - Apply the new transformation to the video
    cap.set(CV_CAP_PROP_POS_FRAMES, 0);


    double width = prev.size().width;
    double height = prev.size().height;
    Mat T(2, 3, CV_64F);

    int vert_border = HORIZONTAL_BORDER_CROP * prev.rows / prev.cols;

    int ex = static_cast<int>(cap.get(CV_CAP_PROP_FOURCC)); // Get Codec type

    VideoWriter writer(outputFile, ex, 30, cv::Size(width, height), true);

    if (!writer.isOpened()) {
      cout << "Could not open file for writing" << endl;
    }

    int k = 0;
    cap.release();

    VideoCapture cap2(inputFile);
    assert(cap2.isOpened());

    // Don't process the very last frame, no valid transform
    while (k < frames - 1) {
      cap2 >> cur;

      if (cur.data == NULL) {
        cout << "Current data is NULL, breaking out...2" << endl;
        break;
      }

      T.at<double>(0,0) = cos(new_prev_to_cur_transform[k].da);
      T.at<double>(0,1) = -sin(new_prev_to_cur_transform[k].da);
      T.at<double>(1,0) = sin(new_prev_to_cur_transform[k].da);
      T.at<double>(1,1) = cos(new_prev_to_cur_transform[k].da);

      T.at<double>(0,2) = new_prev_to_cur_transform[k].dx;
      T.at<double>(1,2) = new_prev_to_cur_transform[k].dy;

      Mat cur2;

      warpAffine(cur, cur2, T, cur.size());

      cur2 = cur2(Range(vert_border, cur2.rows-vert_border), Range(HORIZONTAL_BORDER_CROP, cur2.cols - HORIZONTAL_BORDER_CROP));

      // Resize cur2 back to cur size, for better side by side comparison
      resize(cur2, cur2, cur.size());

      double diffx = width * 0.2;
      double diffy = height * 0.2;

      cv::Rect myROI((diffx / 2), (diffy / 2), width - (diffx), height - (diffy));

      Mat fin = cur2(myROI);
      resize(fin, fin, cur2.size());

      writer.write(fin);

      k++;
    }

    writer.release();
    cout << "Video stabilization complete" << endl;
  });
}

This currently takes anywhere from 3-4 minutes to complete on an iPhone 6. Why would this happen and is there a better option for video stabilization?