Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Thanks for the responses.

cartToPolar only calculates the angle to an accuracy of 0.3 degrees, which is no good for me.

However, I solved this by implementing my own atan2(x,y) [note the switched function signature!] function using OpenCL accelerated OpenCV. There are one or two unnecessary copies so it needs some tidying, but its fast and produces the same result as std::atan2. It also risks divide-by-zero so I need to implement a mask to prevent this (unless there is some sort of default protection against div 0 in OpenCV divide()? Its based on the reference implementation of nVidia code from but I'm still curious to know if there is a cleaner way to just do this using the native OpenCL atan2(y,x) (

and feeding in the handle to the UMats?

Anyway, here is my messy, but working, solution:

   void gpu_atan2(UMat& x_in, UMat& y_in, UMat& mask, UMat& t3)
    // Calculate a good approximation of atan2 on the gpu using openCV UMats
    // Formula adapted from (
    // This seems to be accurate (compared to atan2 CPU) to about 0.0002 degrees

    UMat t0, t1, t2,t3_mul, t4, t3_abs, t1_abs;

    absdiff(x_in, (float)0.0, t3_abs); /* A trick to get abs(x_in) */
    absdiff(y_in,(float)0.0, t1_abs); /* A trick to get abs(y_in) */    
    max(t3_abs, t1_abs, t0); 

    // We have to divide by t0 to calculate t3
    // Lets create a t0 mask that masks out any zero values
    UMat t0_mask(t0.size(), CV_8UC1, cv::Scalar(0));
    compare(t0, (float)0.0, t0_mask, cv::CMP_NE); /* Sets anything not equal to zero to 255 */
    // TODO - use this mask....

    min(t3_abs, t1_abs, t1);
    divide((float)1.0, t0, t3);
    multiply(t1, t3, t3_mul);

    cv::pow(t3_mul, 2, t4);
    //multiply(t3_mul, t3_mul, t4);


    UMat t0_mul(t0.size(), t0.type());
    multiply(t0, t4, t0_mul);
    add(t0_mul, (float)0.057477314, t0);
    multiply(t0, t4, t0_mul);
    subtract(t0_mul, (float)0.121239071, t0);
    multiply(t0, t4, t0_mul);
    add(t0_mul, (float)0.195635925, t0);
    multiply(t0, t4, t0_mul);
    subtract(t0_mul, (float)0.332994597, t0);
    multiply(t0, t4, t0_mul);
    add(t0_mul, (float)0.999995630, t0);

    multiply(t0, t3_mul, t3);

    UMat t1_gt_t3(t1_abs.size(), CV_8UC1, cv::Scalar(0));
    compare(t1_abs, t3_abs, t1_gt_t3, cv::CMP_GT);
    UMat sub1;
    subtract(Scalar((float)1.570796327), t3, sub1);
    //subtract(Scalar((float)1.570796327), t3, t3_mul, t1_gt_t3);
    sub1.copyTo(t3, t1_gt_t3);

    UMat x_in_lt_zero(x_in.size(), CV_8UC1, cv::Scalar(0));
    compare(x_in, Scalar(0.0), x_in_lt_zero, cv::CMP_LT);
    subtract(Scalar((float)3.141592654), t3, sub1);
    //subtract(Scalar((float)3.141592654), t3_mul, t3, x_in_lt_zero);
    sub1.copyTo(t3, x_in_lt_zero);

    UMat y_in_lt_zero(y_in.size(), CV_8UC1, cv::Scalar(0));
    compare(y_in, Scalar((float)0.0), y_in_lt_zero, cv::CMP_LT);
    UMat t3_all_negated(y_in.size(), y_in.type());
    multiply(t3,Scalar((float)-1.0), t3_all_negated); /* Annoyingly we cannot use a mask with multiply in OpenCV */
    t3_all_negated.copyTo(t3, y_in_lt_zero);