Revision history [back]

mobilenet-ssd is a object detection network, not a classification one.

it also has only 10 object classes, not 1000, like the googlenet.

you will need code similar to this, to extract bounding boxes / probabilities / labels:

const char* classNames[] = {"background",
                        "aeroplane", "bicycle", "bird", "boat",
                        "bottle", "bus", "car", "cat", "chair",
                        "cow", "diningtable", "dog", "horse",
                        "motorbike", "person", "pottedplant",
                        "sheep", "sofa", "train", "tvmonitor"};



Mat detection = net.forward("detection_out");                                  //compute output
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

float confidenceThreshold = 0.5;
for(int i = 0; i < detectionMat.rows; i++)
{
    float confidence = detectionMat.at<float>(i, 2);

    if(confidence > confidenceThreshold)
    {
        size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));

        float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
        float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
        float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
        float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;

            ostringstream ss;
            ss << confidence;
            String conf(ss.str());

            Rect object((int)xLeftBottom, (int)yLeftBottom,
                        (int)(xRightTop - xLeftBottom),
                        (int)(yRightTop - yLeftBottom));

            rectangle(frame, object, Scalar(0, 255, 0));
            String label = String(classNames[objectClass]) + ": " + conf;
            int baseLine = 0;
            Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
            rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
                                  Size(labelSize.width, labelSize.height + baseLine)),
                      Scalar(255, 255, 255), CV_FILLED);
            putText(frame, label, Point(xLeftBottom, yLeftBottom),
                    FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
    }
}

imshow("detections", frame);
waitKey();

mobilenet-ssd is a object detection network, not a classification one.

it also has only 10 object classes, not 1000, like the googlenet.

you will need code similar to this, to extract bounding boxes / probabilities / labels:

(please also see the sample here)

const char* classNames[] = {"background",
                        "aeroplane", "bicycle", "bird", "boat",
                        "bottle", "bus", "car", "cat", "chair",
                        "cow", "diningtable", "dog", "horse",
                        "motorbike", "person", "pottedplant",
                        "sheep", "sofa", "train", "tvmonitor"};



Mat detection = net.forward("detection_out");                                  //compute output
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

float confidenceThreshold = 0.5;
for(int i = 0; i < detectionMat.rows; i++)
{
    float confidence = detectionMat.at<float>(i, 2);

    if(confidence > confidenceThreshold)
    {
        size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));

        float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
        float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
        float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
        float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;

            ostringstream ss;
            ss << confidence;
            String conf(ss.str());

            Rect object((int)xLeftBottom, (int)yLeftBottom,
                        (int)(xRightTop - xLeftBottom),
                        (int)(yRightTop - yLeftBottom));

            rectangle(frame, object, Scalar(0, 255, 0));
            String label = String(classNames[objectClass]) + ": " + conf;
            int baseLine = 0;
            Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
            rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
                                  Size(labelSize.width, labelSize.height + baseLine)),
                      Scalar(255, 255, 255), CV_FILLED);
            putText(frame, label, Point(xLeftBottom, yLeftBottom),
                    FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
    }
}

imshow("detections", frame);
waitKey();

mobilenet-ssd is a object detection network, not a classification one.

it also has only 10 object classes, not 1000, like the googlenet.

you will need code similar to this, to extract bounding boxes / probabilities / labels:

(please also see the sample here)

const char* classNames[] = {"background",
                        "aeroplane", "bicycle", "bird", "boat",
                        "bottle", "bus", "car", "cat", "chair",
                        "cow", "diningtable", "dog", "horse",
                        "motorbike", "person", "pottedplant",
                        "sheep", "sofa", "train", "tvmonitor"};



Mat detection = net.forward("detection_out");                                  //compute output
 Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());

float confidenceThreshold = 0.5;
for(int i = 0; i < detectionMat.rows; i++)
{
    float confidence = detectionMat.at<float>(i, 2);

    if(confidence > confidenceThreshold)
    {
        size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));

        float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
        float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
        float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
        float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;

            ostringstream ss;
            ss << confidence;
            String conf(ss.str());

            Rect object((int)xLeftBottom, (int)yLeftBottom,
                        (int)(xRightTop - xLeftBottom),
                        (int)(yRightTop - yLeftBottom));

            rectangle(frame, object, Scalar(0, 255, 0));
            String label = String(classNames[objectClass]) + ": " + conf;
            int baseLine = 0;
            Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
            rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
                                  Size(labelSize.width, labelSize.height + baseLine)),
                      Scalar(255, 255, 255), CV_FILLED);
            putText(frame, label, Point(xLeftBottom, yLeftBottom),
                    FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
    }
}

imshow("detections", frame);
waitKey();