Revision history [back]

@aguila, Thank you! Some bugs in proposal layer have been fixed. To make this model work correctly you need to apply changes from a PR https://github.com/opencv/opencv/pull/11221

As you mentioned, we need to modify .prototxt in the following way:

layer {
  name: 'm3@ssh_proposal'
  # type: 'Python'
  type: 'Proposal'
  bottom: 'm3@ssh_cls_prob_reshape_output'
  bottom: 'm3@ssh_bbox_pred_output'
  bottom: 'im_info'
  top: 'm3@ssh_boxes'
  top: 'm3@ssh_cls_prob'
  # python_param {
  #   module: 'SSH.layers.proposal_layer'
  #   layer: 'ProposalLayer'
  #   param_str: "{'feat_stride': 32,'scales': [16,32], 'ratios':[1,]}"
  proposal_param {
    feat_stride: 32
    scale: 16
    scale: 32
    ratio: 1.0
    nms_thresh: 1.0
  }
}

...

layer {
  name: 'm2@ssh_proposal'
  # type: 'Python'
  type: 'Proposal'
  bottom: 'm2@ssh_cls_prob_reshape_output'
  bottom: 'm2@ssh_bbox_pred_output'
  bottom: 'im_info'
  top: 'm2@ssh_boxes'
  top: 'm2@ssh_cls_prob'
  # python_param {
  #   module: 'SSH.layers.proposal_layer'
  #   layer: 'ProposalLayer'
  #   param_str: "{'feat_stride': 16,'scales': [4,8], 'ratios':[1,]}"
  proposal_param {
    feat_stride: 16
    scale: 4
    scale: 8
    ratio: 1.0
    nms_thresh: 1.0
 }
}

...

layer {
  name: 'm1@ssh_proposal'
  # type: 'Python'
  type: 'Proposal'
  bottom: 'm1@ssh_cls_prob_reshape_output'
  bottom: 'm1@ssh_bbox_pred_output'
  bottom: 'im_info'
  top: 'm1@ssh_boxes'
  top: 'm1@ssh_cls_prob'
  # python_param {
  #   module: 'SSH.layers.proposal_layer'
  #   layer: 'ProposalLayer'
  #   param_str: "{'feat_stride': 8,'scales': [1,2], 'ratios':[1,]}"
  proposal_param {
    feat_stride: 8
    scale: 1
    scale: 2
    ratio: 1.0
    nms_thresh: 1.0
  }
}

The most of arguments are mapped correspondingly but nms_thresh: 1.0 is used to disable NMS postprocessing as model's authors did (see https://github.com/mahyarnajibi/SSH/blob/5f41b5c569a3e60720651833b0f804db96f4f20d/SSH/layers/proposal_layer.py#L157)

NOTE: without non-maximum suppression these layers just produce all the predicted bounding boxes (no more than 300 by default). They are sorted by confidence and almost all of them have a quite small confidence close to zero. However if you know that your image has more than 300 faces (that's hardly not possible), add an extra parameter post_nms_topn which equals 300 by default. In example, post_nms_topn: 1000.

To get bounding boxes and scores prediction, run OpenCV:

import cv2 as cv
import numpy as np

imInfo = np.array([224, 224, 1.6], dtype=np.float32)

CONF_THRESH = 0.5
NMS_THRESH = 0.45

net = cv.dnn.readNet('test_ssh.prototxt', 'SSH.caffemodel')

img = cv.imread('/path/to/image)

imgHeight = img.shape[0]
imgWidth = img.shape[1]

inp = cv.dnn.blobFromImage(img, 1.0, (224, 224), (102.9801, 115.9465, 122.7717), False, False)

net.setInput(inp)
net.setInput(imInfo, 'im_info')

outs = net.forward(['ssh_cls_prob', 'ssh_boxes'])

Then you need to apply NMS to filter predicted faces:

scores = outs[0].flatten().tolist()
boxes = []
for box in outs[1]:
    left = int(box[1] / 224 * imgWidth)
    top = int(box[2] / 224 * imgHeight)
    right = int(box[3] / 224 * imgWidth)
    bottom = int(box[4] / 224 * imgHeight)
    boxes.append([left, top, right - left + 1, bottom - top + 1])

ids = cv.dnn.NMSBoxes(boxes, scores, CONF_THRESH, NMS_THRESH)

for idx in ids:
    idx = idx[0]
    score = scores[idx]
    box = boxes[idx]

    left = box[0]
    top = box[1]
    right = left + box[2] - 1
    bottom = top + box[3] - 1
    cv.rectangle(img, (left, top), (right, bottom), (0, 255, 0))

cv.imshow('ssh', img)
cv.waitKey()

@aguila, Thank you! Some bugs in proposal layer have been fixed. To make this model work correctly you need to apply changes from a PR https://github.com/opencv/opencv/pull/11221

As you mentioned, we need to modify .prototxt in the following way:

layer {
  name: 'm3@ssh_proposal'
  # type: 'Python'
  type: 'Proposal'
  bottom: 'm3@ssh_cls_prob_reshape_output'
  bottom: 'm3@ssh_bbox_pred_output'
  bottom: 'im_info'
  top: 'm3@ssh_boxes'
  top: 'm3@ssh_cls_prob'
  # python_param {
  #   module: 'SSH.layers.proposal_layer'
  #   layer: 'ProposalLayer'
  #   param_str: "{'feat_stride': 32,'scales': [16,32], 'ratios':[1,]}"
  proposal_param {
    feat_stride: 32
    scale: 16
    scale: 32
    ratio: 1.0
    nms_thresh: 1.0
  }
}

...

layer {
  name: 'm2@ssh_proposal'
  # type: 'Python'
  type: 'Proposal'
  bottom: 'm2@ssh_cls_prob_reshape_output'
  bottom: 'm2@ssh_bbox_pred_output'
  bottom: 'im_info'
  top: 'm2@ssh_boxes'
  top: 'm2@ssh_cls_prob'
  # python_param {
  #   module: 'SSH.layers.proposal_layer'
  #   layer: 'ProposalLayer'
  #   param_str: "{'feat_stride': 16,'scales': [4,8], 'ratios':[1,]}"
  proposal_param {
    feat_stride: 16
    scale: 4
    scale: 8
    ratio: 1.0
    nms_thresh: 1.0
 }
}

...

layer {
  name: 'm1@ssh_proposal'
  # type: 'Python'
  type: 'Proposal'
  bottom: 'm1@ssh_cls_prob_reshape_output'
  bottom: 'm1@ssh_bbox_pred_output'
  bottom: 'im_info'
  top: 'm1@ssh_boxes'
  top: 'm1@ssh_cls_prob'
  # python_param {
  #   module: 'SSH.layers.proposal_layer'
  #   layer: 'ProposalLayer'
  #   param_str: "{'feat_stride': 8,'scales': [1,2], 'ratios':[1,]}"
  proposal_param {
    feat_stride: 8
    scale: 1
    scale: 2
    ratio: 1.0
    nms_thresh: 1.0
  }
}

To get bounding boxes and scores prediction, run OpenCV:

import cv2 as cv
import numpy as np

imInfo = np.array([224, 224, 1.6], dtype=np.float32)

CONF_THRESH = 0.5
NMS_THRESH = 0.45

net = cv.dnn.readNet('test_ssh.prototxt', 'SSH.caffemodel')

img = cv.imread('/path/to/image)
cv.imread('/path/to/image')

imgHeight = img.shape[0]
imgWidth = img.shape[1]

inp = cv.dnn.blobFromImage(img, 1.0, (224, 224), (102.9801, 115.9465, 122.7717), False, False)

net.setInput(inp)
net.setInput(imInfo, 'im_info')

outs = net.forward(['ssh_cls_prob', 'ssh_boxes'])

Then you need to apply NMS to filter predicted faces:

scores = outs[0].flatten().tolist()
boxes = []
for box in outs[1]:
    left = int(box[1] / 224 * imgWidth)
    top = int(box[2] / 224 * imgHeight)
    right = int(box[3] / 224 * imgWidth)
    bottom = int(box[4] / 224 * imgHeight)
    boxes.append([left, top, right - left + 1, bottom - top + 1])

ids = cv.dnn.NMSBoxes(boxes, scores, CONF_THRESH, NMS_THRESH)

for idx in ids:
    idx = idx[0]
    score = scores[idx]
    box = boxes[idx]

    left = box[0]
    top = box[1]
    right = left + box[2] - 1
    bottom = top + box[3] - 1
    cv.rectangle(img, (left, top), (right, bottom), (0, 255, 0))

cv.imshow('ssh', img)
cv.waitKey()