To determine the class name of the detected object, I need to get the class_id of the image. The problem is, np.argmax always returns 0 and gets the first class name. When I detect another object, it should print class_id 1 but it prints 0 and I can't get the proper label name to display.
When I look at my .txt files, I see this:
0 0.170103 0.449807 0.319588 0.521236
1 0.266791 0.148936 0.496269 0.287234
2 0.265464 0.422780 0.510309 0.420849
def detect_img(self, img):
blob = cv2.dnn.blobFromImage(img, 0.00392 ,(416,416), (0,0,0), True, crop=False)
input_img = self.net.setInput(blob)
output = self.net.forward(self.output)
height, width, channel = img.shape
boxes = []
trusts = []
class_ids = []
for out in output:
for detect in out:
total_scores = detect[5:]
class_id = np.argmax(total_scores)
print(np.argmax(detect))
trust_factor = total_scores[class_id]
if trust_factor > 0.2:
x_center = int(detect[0] * width)
y_center = int(detect[1] * height)
w = int(detect[2] * width)
h = int(detect[3] * height)
x = int(x_center - w / 2)
y = int(x_center - h / 2)
boxes.append([x,y,w,h])
trusts.append(float(trust_factor))
class_ids.append(class_id)
for index in range(len(boxes)):
# if index in indexes:
x,y,w,h = boxes[index]
label = self.classes[class_ids[index]]
trust = round(trusts[index], 2)
text = f"{label}, Trust: {trust}"
cv2.rectangle(img, (x,y), (x + w, y + h), (0,255,0), 2)
cv2.putText(img, text, (x - 20, y + 40), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255), 2)