I have made a simple code to recognize cars in a highway. The prediction works Ok, however the challenge comes when I would like to track them using the Multitrack method in OpenCV.
The problem is, in each frame the cascade recognizes say, 3 cars. These 3 cars feed the multiobject track, so for every next frame there would be tracked. However the cascade keep recognizing cars that goes into the tracking. Of course a car is the same, so it doesn't need to be recognized again and again.
I am using the video from here - so you can download it and rename it to uk_road.avi.
Here is the image of the result - which you can see multiple boxes are drawn because the multi-object and the cascade are not synchronized.
I am using python 3.6.6 and opencv version 3.4.2
#! /usr/bin/python
import cv2
import numpy as np
import imutils
def diffUpDown(img):
# compare top and bottom size of the image
# 1. cut image in two
# 2. flip the top side
# 3. resize to same size
# 4. compare difference
height, width, depth = img.shape
half = int(height/2)
top = img[0:half, 0:width]
bottom = img[half:half+half, 0:width]
top = cv2.flip(top,1)
bottom = cv2.resize(bottom, (32, 64))
top = cv2.resize(top, (32, 64))
return ( mse(top,bottom) )
def diffLeftRight(img):
# compare left and right size of the image
# 1. cut image in two
# 2. flip the right side
# 3. resize to same size
# 4. compare difference
height, width, depth = img.shape
half = int(width/2)
left = img[0:height, 0:half]
right = img[0:height, half:half + half-1]
right = cv2.flip(right,1)
left = cv2.resize(left, (32, 64))
right = cv2.resize(right, (32, 64))
return ( mse(left,right) )
def mse(imageA, imageB):
err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
err /= float(imageA.shape[0] * imageA.shape[1])
return err
def isNewRoi(rx,ry,rw,rh,rectangles):
for r in rectangles:
if abs(r[0] - rx) < 30 and abs(r[1] - ry) < 30:
return False
return True
def detectRegionsOfInterest(frame, cascade):
scaleDown = 2
frameHeight, frameWidth, fdepth = frame.shape
# Resize
frame = cv2.resize(frame, (int(frameWidth/scaleDown), int(frameHeight/scaleDown)))
frameHeight, frameWidth, fdepth = frame.shape
# haar detection.
cars = cascade.detectMultiScale(frame, 2, 1)
newRegions = []
minY = int(frameHeight*0.1)
# iterate regions of interest
for (x,y,w,h) in cars:
roi = [x,y,w,h]
roiImage = frame[y:y+h, x:x+w]
if y > minY:
diffX = diffLeftRight(roiImage)
diffY = round(diffUpDown(roiImage))
if diffX > 200 and diffY > 1200 :
rx,ry,rw,rh = roi
newRegions.append( [rx*scaleDown,ry*scaleDown,rw*scaleDown,rh*scaleDown] )
return newRegions
def detectCars(filename):
trackers = cv2.MultiTracker_create()
rectangles = []
cascade = cv2.CascadeClassifier('cars.xml')
vc = cv2.VideoCapture(filename)
if vc.isOpened():
rval , frame = vc.read()
else:
rval = False
frameCount = 0
while frameCount < 40 :
rval, frame = vc.read()
frame = imutils.resize(frame, width=600)
frameHeight, frameWidth, fdepth = frame.shape
newRegions = detectRegionsOfInterest(frame, cascade)
# for region in newRegions:
# if isNewRoi(region[0],region[1],region[2],region[3],rectangles):
# rectangles.append(region)
# tracker = cv2.TrackerKCF_create()
# trackers.add(tracker, frame, tuple(region))
tracker = cv2.TrackerKCF_create()
for region in newRegions:
trackers.add(tracker, frame, tuple(region))
ok, trackbox = trackers.update(frame)
#for r in rectangles:
#cv2.rectangle(frame,(r[0],r[1]),(r[0]+r[2],r[1]+r[3]),(0,0,255),3)
for newbox in trackbox:
p1 = (int(newbox[0]), int(newbox[1]))
p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3]))
cv2.rectangle(frame, p1, p2, (200,0,0))
# frameCount = frameCount + 1
# if frameCount > 5:
# frameCount = 0
# rectangles = []
# show result
# cv2.line(frame, (750, 400), (1070, 400), (0,255,0), 14)
cv2.imshow("tracking",frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.waitKey(1);
print(trackers)
print(type(trackers))
frameCount = frameCount + 1
vc.release()
cv2.destroyAllWindows() # destroy all the opened windows
detectCars('uk_road.avi')