Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Making Object Detection Faster

Hello, I am currently trying out the deep neural network in OpenCV 3.3.0

I am currently trying out object detection with dnn.

However, my code seems to run 1 frame per 10 seconds!! (Literally).

Can someone please tell me if it's just my slow computer or if it is that my code is not well written? Thanks in advance.

Here is my code (By the way, my computer has 4GB of RAM):

#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include <iostream>
#include <algorithm>

using namespace std;
using namespace cv;
using namespace cv::dnn;

const char* classNames[] = { "background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor" };

int main()
{
    dnn::Net net = readNetFromCaffe("deploy.prototxt", "VGG_VOC0712_SSD_300x300_ft_iter_120000.caffemodel");

    VideoCapture cap(0);

    while (true)
    {
        Mat frame;
        cap >> frame;

        if (frame.empty())
        {
            waitKey();
            break;
        }

        if (frame.channels() == 4)
        {
            cvtColor(frame, frame, COLOR_BGRA2BGR);
        }

        Mat inputBlob = blobFromImage(frame, 1.0f, Size(300, 300), Scalar(104, 117, 123), false);                                                                            //! [Set input blob]
        net.setInput(inputBlob, "data");
        Mat detection = net.forward("detection_out");

        Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

        float confidenceThreshold = 0.5;
        for (int i = 0; i < detectionMat.rows; i++)
        {
            float confidence = detectionMat.at<float>(i, 2);

            if (confidence > confidenceThreshold)
            {
                size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));

                int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
                int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
                int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
                int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);

                ostringstream ss;
                ss.str("");
                ss << confidence;
                String conf(ss.str());

                Rect object(xLeftBottom, yLeftBottom,
                    xRightTop - xLeftBottom,
                    yRightTop - yLeftBottom);

                rectangle(frame, object, Scalar(0, 255, 0));
                String label = String(classNames[objectClass]) + ": " + conf;
                int baseLine = 0;
                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
                rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
                    Size(labelSize.width, labelSize.height + baseLine)),
                    Scalar(255, 255, 255), CV_FILLED);
                putText(frame, label, Point(xLeftBottom, yLeftBottom),
                    FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
            }
        }

        imshow("detections", frame);
        if (waitKey(1) >= 0) break;
    }

    return 0;
}