Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Hi,

To use OpenCL, I use in addition to cv::UMat:

  • cv::ocl::setUseOpenCL(true);
  • I add an environment variable to set the correct GPU device (see the documentation) as I have an integrated GPU (Intel HD Graphics) and a dedicated GPU: name of the variable: OPENCV_OPENCL_DEVICE ; value of the variable: :GPU:1

Some tests I did for CascadeClassifier::detectMultiScale() using OpenCV-3.0.0-rc1, Windows 7 x64, VS2010 in release mode, image size=1280x720, results on an average of 1000 images:

Only the CPU (Intel Core i7): 12.46 FPS, CPU load: 65%
OpenCL + Intel HD Graphics: 7 FPS, CPU load: 8%, GPU load: 78%, (x0.56)
OpenCL + GPU (nVidia): 13 FPS, CPU load: 25%, GPU load: 70%, (x1.04)
CUDA + GPU: 30 FPS, CPU load: 12%, GPU load: 60%, (x2,4)

On my computer, the gain for OpenCL + GPU is negligible compared to using only the CPU. However, with CUDA + GPU the speed-up is about x2. I did't check if the results are the same for all the version of detectMultiScale.

The code I used for my tests, feel free to add your results to disprove/confirm my results:

#include <iostream>

#include <opencv2/opencv.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudaobjdetect.hpp>
#include <opencv2/cudaimgproc.hpp>


int main(int argc, char**argv) {
    std::cout << "OpenCV version=" << std::hex << CV_VERSION << std::dec << std::endl;

    cv::Mat frame;
    cv::UMat uframe, uFrameGray;
    cv::cuda::GpuMat image_gpu, image_gpu_gray;
    cv::VideoCapture capture("path_to_the_video");

    bool useOpenCL = (argc >= 2) ? atoi(argv[1]) : false;
    std::cout << "Use OpenCL=" << useOpenCL << std::endl;
    cv::ocl::setUseOpenCL(useOpenCL);

    bool useCuda = (argc >= 3) ? atoi(argv[2]) : false;
    std::cout << "Use CUDA=" << useCuda << std::endl;

    cv::Ptr<cv::CascadeClassifier> cascade = cv::makePtr<cv::CascadeClassifier>("data/lbpcascades/lbpcascade_frontalface.xml");
    cv::Ptr<cv::cuda::CascadeClassifier> cascade_gpu = cv::cuda::CascadeClassifier::create("data/lbpcascades/lbpcascade_frontalface.xml");

    double time = 0.0;
    int nb = 0;
    if(capture.isOpened()) {
        for(;;) {
            capture >> frame;
            if(frame.empty() || nb >= 1000) {
                break;
            }

            std::vector<cv::Rect> faces;
            double t = 0.0;
            if(!useCuda) {
                t = (double) cv::getTickCount();
                frame.copyTo(uframe);
                cv::cvtColor(uframe, uFrameGray, CV_BGR2GRAY);
                cascade->detectMultiScale(uFrameGray, faces);
                t = ((double) cv::getTickCount() - t) / cv::getTickFrequency();
            } else {
                t = (double) cv::getTickCount();
                image_gpu.upload(frame);
                cv::cuda::cvtColor(image_gpu, image_gpu_gray, CV_BGR2GRAY);
                cv::cuda::GpuMat objbuf;
                cascade_gpu->detectMultiScale(image_gpu_gray, objbuf);
                cascade_gpu->convert(objbuf, faces);
                t = ((double) cv::getTickCount() - t) / cv::getTickFrequency();
            }

            time += t;
            nb++;

            for(std::vector<cv::Rect>::const_iterator it = faces.begin(); it != faces.end(); ++it) {
                cv::rectangle(frame, *it, cv::Scalar(0,0,255));
            }
            std::stringstream ss;
            ss << "FPS=" << (nb / time);
            cv::putText(frame, ss.str(), cv::Point(30, 30), cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0,0,255));

            cv::imshow("Frame", frame);
            char c = cv::waitKey(30);
            if(c == 27) {
                break;
            }
        }
    }

    std::cout << "Mean time=" << (time / nb) << " s" << " ; Mean FPS=" << (nb / time) << " ; nb=" << nb << std::endl;
    system("pause");
    return 0;
}