Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Yolov3 and darknet problem

I developed my custom object detector using tiny yolo and darknet. It work great, but I need of one specific features:
the network outputs bounding boxes are each represented by a vector of number of classes + 5 elements. The first 4 elements represent the center_x, center_y, width and height. The fifth element represents the confidence that the bounding box encloses an object. The rest of the elements are the confidence associated with each class (i.e. object type). For each boxes, I need the confidence associated for each classes, but I have in output only max confindece, others confidence output are 0.

Example run :

print(scores)

returned

[0.        0.        0.5874982]

0.5874982 is the max confidence. It's the 3th class. But I don't understand because the others confidence are 0. Thanks for replay and I'm sorry for my bad english. This is code

 import cv2 as cv
 import argparse
 import sys
 import numpy as np
 import os.path

 confThreshold = 0.5 
 nmsThreshold = 0.6      
 inpWidth = 416          #Width of network's input image
 inpHeight = 416         #Height of network's input image


 parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
 parser.add_argument('--image', help='Path to image file.')
 parser.add_argument('--video', help='Path to video file.')
 args = parser.parse_args()

# Load names of classes
classesFile = "obj.names"
classes = None
with open(classesFile, 'rt') as f:
     classes = f.read().rstrip('\n').split('\n')

 # Give the configuration and weight files for the model and load the network using them.
 modelConfiguration = "yolov3-tiny-obj.cfg"
 modelWeights = "pesi/pesi_3_classi_new/yolov3-tiny-obj_7050.weights"

 net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

 # Get the names of the output layers
 def getOutputsNames(net):
    layersNames = net.getLayerNames()
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

 # Draw the predicted bounding box
 def drawPred(classId, conf, left, top, right, bottom):
    if classId==1:
        cv.rectangle(frame, (left, top), (right, bottom), (3, 14, 186), 3)
    elif classId==0:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)
    elif classId==2:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)

    label = '%.2f' % conf

    # Get the label for the class name and its confidence
    if classes:
       assert(classId < len(classes))
       label = '%s:%s' % (classes[classId], label)

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.rectangle(frame, (left, top - round(1*labelSize[1])), (left + round(1*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0), 1)

  # Remove the bounding boxes with low confidence using non-maxima suppression
  def postprocess(frame, outs):
   frameHeight = frame.shape[0]
   frameWidth = frame.shape[1]

   # Scan through all the bounding boxes output from the network and keep only the
   # ones with high confidence scores. Assign the box's class label as the class with the highest score.
   classIds = []
   confidences = []
   boxes = []
   for out in outs:
       for detection in out:
           scores = detection[5:]
           classId = np.argmax(scores)
           confidence = scores[classId]
           if confidence > confThreshold:
               print(scores)
               center_x = int(detection[0] * frameWidth)
               center_y = int(detection[1] * frameHeight)
               width = int(detection[2] * frameWidth)
               height = int(detection[3] * frameHeight)
               left = int(center_x - width / 2)
               top = int(center_y - height / 2)
               classIds.append(classId)
               confidences.append(float(confidence))
               boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

# Process inputs
winName = 'Deep learning object detection in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)

outputFile = "yolo_out_py.avi"
 if (args.image):
   # Open the image file
   if not os.path.isfile(args.image):
       print("Input image file ", args.image, " doesn't exist")
       sys.exit(1)
   cap = cv.VideoCapture(args.image)
   outputFile = args.image[:-4]+'_yolo_out_py.jpg'
 elif (args.video):
   if not os.path.isfile(args.video):
      print("Input video file ", args.video, " doesn't exist")
      sys.exit(1)
   cap = cv.VideoCapture(args.video)
   outputFile = args.video[:-4]+'_yolo_out_py.avi'
 else:
    cap = cv.VideoCapture(0)

if (not args.image):
   vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 5, ( 
   round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

while cv.waitKey(1) < 0:

   hasFrame, frame = cap.read()

   if not hasFrame:
      print("Done processing !!!")
      print("Output file is stored as ", outputFile)
      cv.waitKey(3000)
      # Release device
      cap.release()
      break

# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)

# Sets the input to the network
net.setInput(blob)

# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))

# Remove the bounding boxes with low confidence
postprocess(frame, outs)

# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for 
each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

if (args.image):
    cv.imwrite(outputFile, frame.astype(np.uint8))
else:
    vid_writer.write(frame.astype(np.uint8))

cv.imshow(winName, frame)

Yolov3 and darknet problem

I developed my custom object detector using tiny yolo and darknet. It work great, but I need of one specific features:
the network outputs bounding boxes are each represented by a vector of number of classes + 5 elements. The first 4 elements represent the center_x, center_y, width and height. The fifth element represents the confidence that the bounding box encloses an object. The rest of the elements are the confidence associated with each class (i.e. object type). For each boxes, I need the confidence associated for each classes, but I have in output only max confindece, others confidence output are 0.

Example run :

print(scores)

returned

[0.        0.        0.5874982]

0.5874982 is the max confidence. It's the 3th class. But I don't understand because the others confidence are 0. Thanks for replay and I'm sorry for my bad english. This is code

 import cv2 as cv
 import argparse
 import sys
 import numpy as np
 import os.path

 confThreshold = 0.5 
 nmsThreshold = 0.6      
 inpWidth = 416          #Width of network's input image
 inpHeight = 416         #Height of network's input image


 parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
 parser.add_argument('--image', help='Path to image file.')
 parser.add_argument('--video', help='Path to video file.')
 args = parser.parse_args()

# Load names of classes
classesFile = "obj.names"
classes = None
with open(classesFile, 'rt') as f:
     classes = f.read().rstrip('\n').split('\n')

 # Give the configuration and weight files for the model and load the network using them.
 modelConfiguration = "yolov3-tiny-obj.cfg"
 modelWeights = "pesi/pesi_3_classi_new/yolov3-tiny-obj_7050.weights"

 net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

 # Get the names of the output layers
 def getOutputsNames(net):
    layersNames = net.getLayerNames()
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

 # Draw the predicted bounding box
 def drawPred(classId, conf, left, top, right, bottom):
    if classId==1:
        cv.rectangle(frame, (left, top), (right, bottom), (3, 14, 186), 3)
    elif classId==0:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)
    elif classId==2:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)

    label = '%.2f' % conf

    # Get the label for the class name and its confidence
    if classes:
       assert(classId < len(classes))
       label = '%s:%s' % (classes[classId], label)

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.rectangle(frame, (left, top - round(1*labelSize[1])), (left + round(1*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0), 1)

  # Remove the bounding boxes with low confidence using non-maxima suppression
  def postprocess(frame, outs):
   frameHeight = frame.shape[0]
   frameWidth = frame.shape[1]

   # Scan through all the bounding boxes output from the network and keep only the
   # ones with high confidence scores. Assign the box's class label as the class with the highest score.
   classIds = []
   confidences = []
   boxes = []
   for out in outs:
       for detection in out:
           scores = detection[5:]
           classId = np.argmax(scores)
           confidence = scores[classId]
           if confidence > confThreshold:
               print(scores)
               center_x = int(detection[0] * frameWidth)
               center_y = int(detection[1] * frameHeight)
               width = int(detection[2] * frameWidth)
               height = int(detection[3] * frameHeight)
               left = int(center_x - width / 2)
               top = int(center_y - height / 2)
               classIds.append(classId)
               confidences.append(float(confidence))
               boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

# Process inputs
winName = 'Deep learning object detection in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)

outputFile = "yolo_out_py.avi"
 if (args.image):
   # Open the image file
   if not os.path.isfile(args.image):
       print("Input image file ", args.image, " doesn't exist")
       sys.exit(1)
   cap = cv.VideoCapture(args.image)
   outputFile = args.image[:-4]+'_yolo_out_py.jpg'
 elif (args.video):
   if not os.path.isfile(args.video):
      print("Input video file ", args.video, " doesn't exist")
      sys.exit(1)
   cap = cv.VideoCapture(args.video)
   outputFile = args.video[:-4]+'_yolo_out_py.avi'
 else:
    cap = cv.VideoCapture(0)

if (not args.image):
   vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 5, ( 
   round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

while cv.waitKey(1) < 0:

   hasFrame, frame = cap.read()

   if not hasFrame:
      print("Done processing !!!")
      print("Output file is stored as ", outputFile)
      cv.waitKey(3000)
      # Release device
      cap.release()
      break

# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)

# Sets the input to the network
net.setInput(blob)

# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))

# Remove the bounding boxes with low confidence
postprocess(frame, outs)

# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for 
each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

if (args.image):
    cv.imwrite(outputFile, frame.astype(np.uint8))
else:
    vid_writer.write(frame.astype(np.uint8))

cv.imshow(winName, frame)

Yolov3 and darknet problem

I developed my custom object detector using tiny yolo and darknet. It work great, but I need of one specific features:
the network outputs bounding boxes are each represented by a vector of number of classes + 5 elements. The first 4 elements represent the center_x, center_y, width and height. The fifth element represents the confidence that the bounding box encloses an object. The rest of the elements are the confidence associated with each class (i.e. object type). For each boxes, I need the confidence associated for each classes, but I have in output only max confindece, others confidence output are 0.

Example run :

print(scores)

returned

[0.        0.        0.5874982]

0.5874982 is the max confidence. It's the 3th class. But I don't understand because the others confidence are 0. Thanks for replay and I'm sorry for my bad english. This is code

 import cv2 as cv
 import argparse
 import sys
 import numpy as np
 import os.path

 confThreshold = 0.5 
 nmsThreshold = 0.6      
 inpWidth = 416          #Width of network's input image
 inpHeight = 416         #Height of network's input image


 parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
 parser.add_argument('--image', help='Path to image file.')
 parser.add_argument('--video', help='Path to video file.')
 args = parser.parse_args()

# Load names of classes
classesFile = "obj.names"
classes = None
with open(classesFile, 'rt') as f:
     classes = f.read().rstrip('\n').split('\n')

 # Give the configuration and weight files for the model and load the network using them.
 modelConfiguration = "yolov3-tiny-obj.cfg"
 modelWeights = "pesi/pesi_3_classi_new/yolov3-tiny-obj_7050.weights"

 net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

 # Get the names of the output layers
 def getOutputsNames(net):
    layersNames = net.getLayerNames()
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

 # Draw the predicted bounding box
 def drawPred(classId, conf, left, top, right, bottom):
    if classId==1:
        cv.rectangle(frame, (left, top), (right, bottom), (3, 14, 186), 3)
    elif classId==0:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)
    elif classId==2:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)

    label = '%.2f' % conf

    # Get the label for the class name and its confidence
    if classes:
       assert(classId < len(classes))
       label = '%s:%s' % (classes[classId], label)

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.rectangle(frame, (left, top - round(1*labelSize[1])), (left + round(1*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0), 1)

  # Remove the bounding boxes with low confidence using non-maxima suppression
  def postprocess(frame, outs):
   frameHeight = frame.shape[0]
   frameWidth = frame.shape[1]

   # Scan through all the bounding boxes output from the network and keep only the
   # ones with high confidence scores. Assign the box's class label as the class with the highest score.
   classIds = []
   confidences = []
   boxes = []
   for out in outs:
       for detection in out:
           scores = detection[5:]
           classId = np.argmax(scores)
           confidence = scores[classId]
           if confidence > confThreshold:
               print(scores)
               center_x = int(detection[0] * frameWidth)
               center_y = int(detection[1] * frameHeight)
               width = int(detection[2] * frameWidth)
               height = int(detection[3] * frameHeight)
               left = int(center_x - width / 2)
               top = int(center_y - height / 2)
               classIds.append(classId)
               confidences.append(float(confidence))
               boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

# Process inputs
winName = 'Deep learning object detection in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)

outputFile = "yolo_out_py.avi"
 if (args.image):
   # Open the image file
   if not os.path.isfile(args.image):
       print("Input image file ", args.image, " doesn't exist")
       sys.exit(1)
   cap = cv.VideoCapture(args.image)
   outputFile = args.image[:-4]+'_yolo_out_py.jpg'
 elif (args.video):
   if not os.path.isfile(args.video):
      print("Input video file ", args.video, " doesn't exist")
      sys.exit(1)
   cap = cv.VideoCapture(args.video)
   outputFile = args.video[:-4]+'_yolo_out_py.avi'
 else:
    cap = cv.VideoCapture(0)

if (not args.image):
   vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 5, ( 
   round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

while cv.waitKey(1) < 0:

   hasFrame, frame = cap.read()

   if not hasFrame:
      print("Done processing !!!")
      print("Output file is stored as ", outputFile)
      cv.waitKey(3000)
      # Release device
      cap.release()
      break

# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)

# Sets the input to the network
net.setInput(blob)

# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))

# Remove the bounding boxes with low confidence
postprocess(frame, outs)

# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for 
each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

if (args.image):
    cv.imwrite(outputFile, frame.astype(np.uint8))
else:
    vid_writer.write(frame.astype(np.uint8))

cv.imshow(winName, frame)
click to hide/show revision 4
retagged

Yolov3 and darknet problem

I developed my custom object detector using tiny yolo and darknet. It work great, but I need of one specific features:
the network outputs bounding boxes are each represented by a vector of number of classes + 5 elements. The first 4 elements represent the center_x, center_y, width and height. The fifth element represents the confidence that the bounding box encloses an object. The rest of the elements are the confidence associated with each class (i.e. object type). For each boxes, I need the confidence associated for each classes, but I have in output only max confindece, others confidence output are 0.

Example run :

print(scores)

returned

[0.        0.        0.5874982]

0.5874982 is the max confidence. It's the 3th class. But I don't understand because the others confidence are 0. Thanks for replay and I'm sorry for my bad english. This is code

 import cv2 as cv
 import argparse
 import sys
 import numpy as np
 import os.path

 confThreshold = 0.5 
 nmsThreshold = 0.6      
 inpWidth = 416          #Width of network's input image
 inpHeight = 416         #Height of network's input image


 parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
 parser.add_argument('--image', help='Path to image file.')
 parser.add_argument('--video', help='Path to video file.')
 args = parser.parse_args()

# Load names of classes
classesFile = "obj.names"
classes = None
with open(classesFile, 'rt') as f:
     classes = f.read().rstrip('\n').split('\n')

 # Give the configuration and weight files for the model and load the network using them.
 modelConfiguration = "yolov3-tiny-obj.cfg"
 modelWeights = "pesi/pesi_3_classi_new/yolov3-tiny-obj_7050.weights"

 net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

 # Get the names of the output layers
 def getOutputsNames(net):
    layersNames = net.getLayerNames()
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

 # Draw the predicted bounding box
 def drawPred(classId, conf, left, top, right, bottom):
    if classId==1:
        cv.rectangle(frame, (left, top), (right, bottom), (3, 14, 186), 3)
    elif classId==0:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)
    elif classId==2:
        cv.rectangle(frame, (left, top), (right, bottom), (40, 198, 31), 3)

    label = '%.2f' % conf

    # Get the label for the class name and its confidence
    if classes:
       assert(classId < len(classes))
       label = '%s:%s' % (classes[classId], label)

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.rectangle(frame, (left, top - round(1*labelSize[1])), (left + round(1*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0), 1)

  # Remove the bounding boxes with low confidence using non-maxima suppression
  def postprocess(frame, outs):
   frameHeight = frame.shape[0]
   frameWidth = frame.shape[1]

   # Scan through all the bounding boxes output from the network and keep only the
   # ones with high confidence scores. Assign the box's class label as the class with the highest score.
   classIds = []
   confidences = []
   boxes = []
   for out in outs:
       for detection in out:
           scores = detection[5:]
           classId = np.argmax(scores)
           confidence = scores[classId]
           if confidence > confThreshold:
               print(scores)
               center_x = int(detection[0] * frameWidth)
               center_y = int(detection[1] * frameHeight)
               width = int(detection[2] * frameWidth)
               height = int(detection[3] * frameHeight)
               left = int(center_x - width / 2)
               top = int(center_y - height / 2)
               classIds.append(classId)
               confidences.append(float(confidence))
               boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

# Process inputs
winName = 'Deep learning object detection in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)

outputFile = "yolo_out_py.avi"
 if (args.image):
   # Open the image file
   if not os.path.isfile(args.image):
       print("Input image file ", args.image, " doesn't exist")
       sys.exit(1)
   cap = cv.VideoCapture(args.image)
   outputFile = args.image[:-4]+'_yolo_out_py.jpg'
 elif (args.video):
   if not os.path.isfile(args.video):
      print("Input video file ", args.video, " doesn't exist")
      sys.exit(1)
   cap = cv.VideoCapture(args.video)
   outputFile = args.video[:-4]+'_yolo_out_py.avi'
 else:
    cap = cv.VideoCapture(0)

if (not args.image):
   vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 5, ( 
   round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

while cv.waitKey(1) < 0:

   hasFrame, frame = cap.read()

   if not hasFrame:
      print("Done processing !!!")
      print("Output file is stored as ", outputFile)
      cv.waitKey(3000)
      # Release device
      cap.release()
      break

# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)

# Sets the input to the network
net.setInput(blob)

# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))

# Remove the bounding boxes with low confidence
postprocess(frame, outs)

# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for 
each of the layers(in layersTimes)
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

if (args.image):
    cv.imwrite(outputFile, frame.astype(np.uint8))
else:
    vid_writer.write(frame.astype(np.uint8))

cv.imshow(winName, frame)