Hi
We are developing the project which is based on Intel NCS2, OpenVINO and OpenCV. I want to run yolov3 models and OpenCV with NCS2 support to object detection.
I've converted yolov3 models to IR models using the following command:
python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3.weights
and
python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3-tiny.weights --tiny
python3 /mo_tf.py --input_model frozen_darknet_yolov3_model.pb --tensorflow_use_custom_operations_config /extensions/front/tf/yolo_v3_tiny.json --input_shape=[1,416,416,3] --data_type FP32 //for CPU
python3 /mo_tf.py --input_model frozen_darknet_yolov3_model.pb --tensorflow_use_custom_operations_config /extensions/front/tf/yolo_v3_tiny.json --input_shape=[1,416,416,3] --data_type FP16 //for MYRIAD
and
python3 /mo_tf.py --input_model frozen_darknet_yolov3_model.pb --tensorflow_use_custom_operations_config /extensions/front/tf/yolo_v3.json --input_shape=[1,416,416,3] --data_type FP32 //for CPU
python3 /mo_tf.py --input_model frozen_darknet_yolov3_model.pb --tensorflow_use_custom_operations_config /extensions/front/tf/yolo_v3.json --input_shape=[1,416,416,3] --data_type FP16 //for MYRIAD
Here is my json file: for yolov3-tiny:
{
"id": "TFYOLOV3",
"match_kind": "general",
"custom_attributes": {
"classes": 80,
"anchors": [10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319],
"coords": 4,
"num": 6,
"mask": [0, 1, 2],
"jitter": 0.3,
"ignore_thresh": 0.7,
"truth_thresh": 1,
"random": 1,
"entry_points": ["detector/yolo-v3-tiny/Reshape", "detector/yolo-v3-tiny/Reshape_4"]
}
}
for yolov3:
{
"id": "TFYOLOV3",
"match_kind": "general",
"custom_attributes": {
"classes": 80,
"coords": 4,
"num": 9,
"mask": [0, 1, 2],
"entry_points": ["detector/yolo-v3/Reshape", "detector/yolo-v3/Reshape_4", "detector/yolo-v3/Reshape_8"]
}
}
Process was a complete success
I trying use converted model in OpenCV. Here code:
...
net = readNet(cfg.modelConfiguration, cfg.modelWeights);
net.setPreferableBackend(cfg.backend);
net.setPreferableTarget(cfg.target);
...
//recognize
blob = blobFromImage(frame, cfg.scale, Size(cfg.inpWidth, cfg.inpHeight), Scalar(), false);
net.setInput(blob);
vector<Mat> outs;
names = getOutputsNames(net);
net.forward(outs, names);
for (size_t i = 0; i < outs.size(); ++i)
{
Mat prob = outs[i]; //4-dimension Mat
//get the coardinates
}
The code was run. But I don't know how to get the coordinates of the bounding.
I tried like this:
Mat prob = outs[i];
Mat detectionMat(prob.size[2], prob.size[3], CV_32F, prob.ptr<float>());
float* data = (float*)detectionMat.data;
for (int j = 0; j < detectionMat.rows; ++j, data += detectionMat.cols)
{
Mat scores = detectionMat.row(j).colRange(5, detectionMat.cols);
Point classIdPoint;
double confidence;
// Get the value and location of the maximum score
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > cfg.confThreshold)
{
ObjResult result;
cout << "confidence " << confidence << endl;
cout << "classIdPoint.x " << classIdPoint.x << endl;
// cout << "classes[classId] " << classes[classIdPoint.x] << endl;
result.centerX = (int)(data[0] * frame.cols);
result.centerY = (int)(data[1] * frame.rows);
result.width = (int)(data[2] * frame.cols);
result.height = (int)(data[3] * frame.rows);
result.confidence = confidence;
result.classId = classIdPoint.x;
result.className = classes[classIdPoint.x];
ret.objResults.push_back(result);
}
}
or:
Mat prob = outs[i];
Mat detectionMat(prob.size[2], prob.size[3], CV_32F, prob.ptr<float>());
for(int c=0; c<numlabels; c++)
{
int labelnum = (size_t)(detectionMat.at<float>(c, 1))-1;
labelnum = (labelnum < 0) ? 0 : (labelnum > numlabels) ? numlabels : labelnum;
cout << "labelnum: " << labelnum << endl;
float confidence = detectionMat.at<float>(c, 2);
cout << "confidence: " << confidence << endl;
if(confidence > 0.5)
{
int classId = static_cast<int>(detectionMat.at<float>(c, 1));
int left = static_cast<int>(detectionMat.at<float>(c, 3) * frame.cols);
int top = static_cast<int>(detectionMat.at<float>(c, 4) * frame.rows);
int right = static_cast<int>(detectionMat.at<float>(c, 5) * frame.cols);
int bottom = static_cast<int>(detectionMat.at<float>(c, 6) * frame.rows);
ObjResult result;
result.width = right - left;
result.height = top - bottom;
result.centerX = left + result.width/2;
result.centerY = bottom + result.height/2;
result.confidence = confidence;
result.classId = classId;
result.className = classes[classId];
ret.objResults.push_back(result);
}
}
But it's not proper way.
How can i get the coordinates of the bounding? Have you any sample code?