Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

GPU slower than CPU with Mali G72

Hi Guys,

I recently bought a Hikey 970 from 96boards to test GPU acceleration for my project. The board has a Mali G72 GPU card, I flashed a prebuild system provided from their forum. With the prebuild system, I found OpenCL library in /usr/lib/aarch64-linux-gnu/libOpenCL.so and include file in /usr/include/aarch64-linux-gnu/CL. That’s what i used to build with OpenCV:

-D WITH_OPENCL=OFF
-D HAVE_OPENCL_STATIC=ON
-D OPENCL_LIBRARIES=/usr/lib/aarch64-linux-gnu/libOpenCL.so
-D OPENCL_INCLUDE_DIRS=/usr/include/aarch64-linux-gnu/CL

My test code is below. Tested image is 25601440 and 100 iterations. The cpu execution time is around 30ms per iteration and the gpu is around 120ms* per iteration.

I am wondering if the library and the header files I used are correct or not for GPU Mali G72. Anyone has a explanation why GPU is more slower than CPU? Thanks for help. Théo

#include “opencv2/opencv.hpp”
#include “opencv2/core/ocl.hpp”
#include <iostream>
#include <stdio.h>

using namespace cv;
using namespace std;

int main(int argc, char** argv)
{
    ocl::setUseOpenCL(true);
    if (ocl::haveOpenCL())
    {
        cout << “OpenCL is available…” << endl;
        //return;
    }

    cv::ocl::Context context;
    if (!context.create(cv::ocl::Device::TYPE_GPU))
    {
        //cout << "Failed creating the context..." << endl;
        //return;
    } 
    cout << context.ndevices() << " GPU devices are detected." << endl;
    for (int i = 0; i < context.ndevices(); i++)
    {
        cv::ocl::Device device = context.device(i);
        cout << "name                 : " << device.name() << endl;
        cout << "available            : " << device.available() << endl;
        cout << "imageSupport         : " << device.imageSupport() << endl;
        cout << "OpenCL_C_Version     : " << device.OpenCL_C_Version() << endl;
        cout << endl;
    }

    UMat img, gray;
    imread("image_2560.jpg", IMREAD_COLOR).copyTo(img);
    //img = imread("image_2560.jpg", 1);
    int64 t=getTickCount();
    for(int i=0; i<100; i++)
    {   
        int64 t1=getTickCount();
        cvtColor(img, gray, COLOR_BGR2GRAY);
        GaussianBlur(gray, gray, Size(7, 7), 1.5);
        Canny(gray, gray, 0, 50);
        t1 = getTickCount() - t1;
        printf("Time elapsed t1: %fms\n", t1*1000/getTickFrequency());
    }
    t = getTickCount() - t;
    printf("Time elapsed t: %fms\n", t*1000/getTickFrequency());
    return 0;
}

}

GPU slower than CPU with Mali G72

Hi Guys,

I recently bought a Hikey 970 from 96boards to test GPU acceleration for my project. The board has a Mali G72 GPU card, I flashed a prebuild system provided from their forum. With the prebuild system, I found OpenCL library in /usr/lib/aarch64-linux-gnu/libOpenCL.so and include file in /usr/include/aarch64-linux-gnu/CL. That’s what i used to build with OpenCV:

-D WITH_OPENCL=OFF
-D HAVE_OPENCL_STATIC=ON
-D OPENCL_LIBRARIES=/usr/lib/aarch64-linux-gnu/libOpenCL.so
-D OPENCL_INCLUDE_DIRS=/usr/include/aarch64-linux-gnu/CL

My test code is below. Tested image is 25601440 2560x1440 and 100 iterations. The cpu execution time is around 30ms per iteration and the gpu is around 120ms120ms* per iteration.

I am wondering if the library and the header files I used are correct or not for GPU Mali G72. Anyone has a explanation why GPU is more slower than CPU? Thanks for help. Théo

#include “opencv2/opencv.hpp”
#include “opencv2/core/ocl.hpp”
#include <iostream>
#include <stdio.h>

using namespace cv;
using namespace std;

int main(int argc, char** argv)
{
    ocl::setUseOpenCL(true);
    if (ocl::haveOpenCL())
    {
        cout << “OpenCL is available…” << endl;
        //return;
    }

    cv::ocl::Context context;
    if (!context.create(cv::ocl::Device::TYPE_GPU))
    {
        //cout << "Failed creating the context..." << endl;
        //return;
    } 
    cout << context.ndevices() << " GPU devices are detected." << endl;
    for (int i = 0; i < context.ndevices(); i++)
    {
        cv::ocl::Device device = context.device(i);
        cout << "name                 : " << device.name() << endl;
        cout << "available            : " << device.available() << endl;
        cout << "imageSupport         : " << device.imageSupport() << endl;
        cout << "OpenCL_C_Version     : " << device.OpenCL_C_Version() << endl;
        cout << endl;
    }

    UMat img, gray;
    imread("image_2560.jpg", IMREAD_COLOR).copyTo(img);
    //img = imread("image_2560.jpg", 1);
    int64 t=getTickCount();
    for(int i=0; i<100; i++)
    {   
        int64 t1=getTickCount();
        cvtColor(img, gray, COLOR_BGR2GRAY);
        GaussianBlur(gray, gray, Size(7, 7), 1.5);
        Canny(gray, gray, 0, 50);
        t1 = getTickCount() - t1;
        printf("Time elapsed t1: %fms\n", t1*1000/getTickFrequency());
    }
    t = getTickCount() - t;
    printf("Time elapsed t: %fms\n", t*1000/getTickFrequency());
    return 0;
}

}

GPU slower than CPU with Mali G72

Hi Guys,

I recently bought a Hikey 970 from 96boards to test GPU acceleration for my project. The board has a Mali G72 GPU card, I flashed a prebuild system provided from their forum. With the prebuild system, I found OpenCL library in /usr/lib/aarch64-linux-gnu/libOpenCL.so and include file in /usr/include/aarch64-linux-gnu/CL. That’s what i used to build with OpenCV:OpenCV 3.4.3:

-D WITH_OPENCL=OFF
-D HAVE_OPENCL_STATIC=ON
-D OPENCL_LIBRARIES=/usr/lib/aarch64-linux-gnu/libOpenCL.so
-D OPENCL_INCLUDE_DIRS=/usr/include/aarch64-linux-gnu/CL

My test code is below. Tested image is 2560x1440 and 100 iterations. The cpu execution time is around 30ms per iteration and the gpu is around 120ms per iteration.

I am wondering if the library and the header files I used are correct or not for GPU Mali G72. Anyone has a explanation why GPU is more slower than CPU? Thanks for help. Théo

#include “opencv2/opencv.hpp”
#include “opencv2/core/ocl.hpp”
#include <iostream>
#include <stdio.h>

using namespace cv;
using namespace std;

int main(int argc, char** argv)
{
    ocl::setUseOpenCL(true);
    if (ocl::haveOpenCL())
    {
        cout << “OpenCL is available…” << endl;
        //return;
    }

    cv::ocl::Context context;
    if (!context.create(cv::ocl::Device::TYPE_GPU))
    {
        //cout << "Failed creating the context..." << endl;
        //return;
    } 
    cout << context.ndevices() << " GPU devices are detected." << endl;
    for (int i = 0; i < context.ndevices(); i++)
    {
        cv::ocl::Device device = context.device(i);
        cout << "name                 : " << device.name() << endl;
        cout << "available            : " << device.available() << endl;
        cout << "imageSupport         : " << device.imageSupport() << endl;
        cout << "OpenCL_C_Version     : " << device.OpenCL_C_Version() << endl;
        cout << endl;
    }

    UMat img, gray;
    imread("image_2560.jpg", IMREAD_COLOR).copyTo(img);
    //img = imread("image_2560.jpg", 1);
    int64 t=getTickCount();
    for(int i=0; i<100; i++)
    {   
        int64 t1=getTickCount();
        cvtColor(img, gray, COLOR_BGR2GRAY);
        GaussianBlur(gray, gray, Size(7, 7), 1.5);
        Canny(gray, gray, 0, 50);
        t1 = getTickCount() - t1;
        printf("Time elapsed t1: %fms\n", t1*1000/getTickFrequency());
    }
    t = getTickCount() - t;
    printf("Time elapsed t: %fms\n", t*1000/getTickFrequency());
    return 0;
}

}

GPU slower than CPU with Mali G72

Hi Guys,

I recently bought a Hikey 970 from 96boards to test GPU acceleration for my project. The board has a Mali G72 GPU card, I flashed a prebuild system provided from their forum. With the prebuild system, I found OpenCL library in /usr/lib/aarch64-linux-gnu/libOpenCL.so and include file in /usr/include/aarch64-linux-gnu/CL. That’s what i used to build with OpenCV 3.4.3:

-D WITH_OPENCL=OFF
-D HAVE_OPENCL_STATIC=ON
-D OPENCL_LIBRARIES=/usr/lib/aarch64-linux-gnu/libOpenCL.so
-D OPENCL_INCLUDE_DIRS=/usr/include/aarch64-linux-gnu/CL

My test code is below. Tested image is 2560x1440 and 100 iterations. The cpu execution time is around 30ms per iteration and the gpu is around 120ms per iteration.

I am wondering if the library and the header files I used are correct or not for GPU Mali G72. Anyone has a explanation why GPU is more slower than CPU? CPU?

Thanks for help. Théo

#include “opencv2/opencv.hpp”
#include “opencv2/core/ocl.hpp”
#include <iostream>
#include <stdio.h>

using namespace cv;
using namespace std;

int main(int argc, char** argv)
{
    ocl::setUseOpenCL(true);
    if (ocl::haveOpenCL())
    {
        cout << “OpenCL is available…” << endl;
        //return;
    }

    cv::ocl::Context context;
    if (!context.create(cv::ocl::Device::TYPE_GPU))
    {
        //cout << "Failed creating the context..." << endl;
        //return;
    } 
    cout << context.ndevices() << " GPU devices are detected." << endl;
    for (int i = 0; i < context.ndevices(); i++)
    {
        cv::ocl::Device device = context.device(i);
        cout << "name                 : " << device.name() << endl;
        cout << "available            : " << device.available() << endl;
        cout << "imageSupport         : " << device.imageSupport() << endl;
        cout << "OpenCL_C_Version     : " << device.OpenCL_C_Version() << endl;
        cout << endl;
    }

    UMat img, gray;
    imread("image_2560.jpg", IMREAD_COLOR).copyTo(img);
    //img = imread("image_2560.jpg", 1);
    int64 t=getTickCount();
    for(int i=0; i<100; i++)
    {   
        int64 t1=getTickCount();
        cvtColor(img, gray, COLOR_BGR2GRAY);
        GaussianBlur(gray, gray, Size(7, 7), 1.5);
        Canny(gray, gray, 0, 50);
        t1 = getTickCount() - t1;
        printf("Time elapsed t1: %fms\n", t1*1000/getTickFrequency());
    }
    t = getTickCount() - t;
    printf("Time elapsed t: %fms\n", t*1000/getTickFrequency());
    return 0;
}

}