1 | initial version |
Hi,
To use OpenCL, I use in addition to cv::UMat
:
cv::ocl::setUseOpenCL(true);
OPENCV_OPENCL_DEVICE
; value of the variable: :GPU:1
Some tests I did for CascadeClassifier::detectMultiScale()
using OpenCV-3.0.0-rc1, Windows 7 x64, VS2010 in release mode, image size=1280x720, results on an average of 1000 images:
Only the CPU (Intel Core i7): 12.46 FPS, CPU load: 65%
OpenCL + Intel HD Graphics: 7 FPS, CPU load: 8%, GPU load: 78%, (x0.56)
OpenCL + GPU (nVidia): 13 FPS, CPU load: 25%, GPU load: 70%, (x1.04)
CUDA + GPU: 30 FPS, CPU load: 12%, GPU load: 60%, (x2,4)
On my computer, the gain for OpenCL + GPU is negligible compared to using only the CPU. However, with CUDA + GPU the speed-up is about x2. I did't check if the results are the same for all the version of detectMultiScale.
The code I used for my tests, feel free to add your results to disprove/confirm my results:
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudaobjdetect.hpp>
#include <opencv2/cudaimgproc.hpp>
int main(int argc, char**argv) {
std::cout << "OpenCV version=" << std::hex << CV_VERSION << std::dec << std::endl;
cv::Mat frame;
cv::UMat uframe, uFrameGray;
cv::cuda::GpuMat image_gpu, image_gpu_gray;
cv::VideoCapture capture("path_to_the_video");
bool useOpenCL = (argc >= 2) ? atoi(argv[1]) : false;
std::cout << "Use OpenCL=" << useOpenCL << std::endl;
cv::ocl::setUseOpenCL(useOpenCL);
bool useCuda = (argc >= 3) ? atoi(argv[2]) : false;
std::cout << "Use CUDA=" << useCuda << std::endl;
cv::Ptr<cv::CascadeClassifier> cascade = cv::makePtr<cv::CascadeClassifier>("data/lbpcascades/lbpcascade_frontalface.xml");
cv::Ptr<cv::cuda::CascadeClassifier> cascade_gpu = cv::cuda::CascadeClassifier::create("data/lbpcascades/lbpcascade_frontalface.xml");
double time = 0.0;
int nb = 0;
if(capture.isOpened()) {
for(;;) {
capture >> frame;
if(frame.empty() || nb >= 1000) {
break;
}
std::vector<cv::Rect> faces;
double t = 0.0;
if(!useCuda) {
t = (double) cv::getTickCount();
frame.copyTo(uframe);
cv::cvtColor(uframe, uFrameGray, CV_BGR2GRAY);
cascade->detectMultiScale(uFrameGray, faces);
t = ((double) cv::getTickCount() - t) / cv::getTickFrequency();
} else {
t = (double) cv::getTickCount();
image_gpu.upload(frame);
cv::cuda::cvtColor(image_gpu, image_gpu_gray, CV_BGR2GRAY);
cv::cuda::GpuMat objbuf;
cascade_gpu->detectMultiScale(image_gpu_gray, objbuf);
cascade_gpu->convert(objbuf, faces);
t = ((double) cv::getTickCount() - t) / cv::getTickFrequency();
}
time += t;
nb++;
for(std::vector<cv::Rect>::const_iterator it = faces.begin(); it != faces.end(); ++it) {
cv::rectangle(frame, *it, cv::Scalar(0,0,255));
}
std::stringstream ss;
ss << "FPS=" << (nb / time);
cv::putText(frame, ss.str(), cv::Point(30, 30), cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0,0,255));
cv::imshow("Frame", frame);
char c = cv::waitKey(30);
if(c == 27) {
break;
}
}
}
std::cout << "Mean time=" << (time / nb) << " s" << " ; Mean FPS=" << (nb / time) << " ; nb=" << nb << std::endl;
system("pause");
return 0;
}