Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

np.argmax returns 0 always

To determine the class name of the detected object, I need to get the class_id of the image. The problem is, np.argmax always returns 0 and gets the first class name. When I detect another object, it should print class_id 1 but it prints 0 and I can't get the proper label name to display.

When I look at my .txt files, I see this:

0 0.170103 0.449807 0.319588 0.521236

1 0.266791 0.148936 0.496269 0.287234

2 0.265464 0.422780 0.510309 0.420849

def detect_img(self, img):
    blob = cv2.dnn.blobFromImage(img, 0.00392 ,(416,416), (0,0,0), True, crop=False)
    input_img = self.net.setInput(blob)
    output = self.net.forward(self.output)

    height, width, channel = img.shape
    boxes = []
    trusts = []
    class_ids = []

    for out in output:
        for detect in out:
            total_scores = detect[5:]
            class_id = np.argmax(total_scores)
            print(np.argmax(detect))
            trust_factor = total_scores[class_id]
            if trust_factor > 0.2:
                x_center = int(detect[0] * width)
                y_center = int(detect[1] * height)
                w = int(detect[2] * width)
                h = int(detect[3] * height)
                x = int(x_center - w / 2)
                y = int(x_center - h / 2)
                boxes.append([x,y,w,h])
                trusts.append(float(trust_factor))
                class_ids.append(class_id)

    for index in range(len(boxes)):
        # if index in indexes:
        x,y,w,h = boxes[index]
        label = self.classes[class_ids[index]]
        trust = round(trusts[index], 2)
        text = f"{label}, Trust: {trust}"
        cv2.rectangle(img, (x,y), (x + w, y + h), (0,255,0), 2)
        cv2.putText(img, text, (x - 20, y + 40), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255), 2)
click to hide/show revision 2
retagged

updated 2020-05-16 08:01:56 -0600

berak gravatar image

np.argmax returns 0 always

To determine the class name of the detected object, I need to get the class_id of the image. The problem is, np.argmax always returns 0 and gets the first class name. When I detect another object, it should print class_id 1 but it prints 0 and I can't get the proper label name to display.

When I look at my .txt files, I see this:

0 0.170103 0.449807 0.319588 0.521236

1 0.266791 0.148936 0.496269 0.287234

2 0.265464 0.422780 0.510309 0.420849

def detect_img(self, img):
    blob = cv2.dnn.blobFromImage(img, 0.00392 ,(416,416), (0,0,0), True, crop=False)
    input_img = self.net.setInput(blob)
    output = self.net.forward(self.output)

    height, width, channel = img.shape
    boxes = []
    trusts = []
    class_ids = []

    for out in output:
        for detect in out:
            total_scores = detect[5:]
            class_id = np.argmax(total_scores)
            print(np.argmax(detect))
            trust_factor = total_scores[class_id]
            if trust_factor > 0.2:
                x_center = int(detect[0] * width)
                y_center = int(detect[1] * height)
                w = int(detect[2] * width)
                h = int(detect[3] * height)
                x = int(x_center - w / 2)
                y = int(x_center - h / 2)
                boxes.append([x,y,w,h])
                trusts.append(float(trust_factor))
                class_ids.append(class_id)

    for index in range(len(boxes)):
        # if index in indexes:
        x,y,w,h = boxes[index]
        label = self.classes[class_ids[index]]
        trust = round(trusts[index], 2)
        text = f"{label}, Trust: {trust}"
        cv2.rectangle(img, (x,y), (x + w, y + h), (0,255,0), 2)
        cv2.putText(img, text, (x - 20, y + 40), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255), 2)

np.argmax returns 0 always

To determine the class name of the detected object, I need to get the class_id of the image. The problem is, np.argmax always returns 0 and gets the first class name. When I detect another object, it should print class_id 1 but it prints 0 and I can't get the proper label name to display.

When I look at my .txt files, I see this:

0 0.170103 0.449807 0.319588 0.521236

1 0.266791 0.148936 0.496269 0.287234

2 0.265464 0.422780 0.510309 0.420849

def detect_img(self, img):
    blob = cv2.dnn.blobFromImage(img, 0.00392 ,(416,416), (0,0,0), True, crop=False)
    input_img = self.net.setInput(blob)
    output = self.net.forward(self.output)

    height, width, channel = img.shape
    boxes = []
    trusts = []
    class_ids = []

    for out in output:
        for detect in out:
            total_scores = detect[5:]
            class_id = np.argmax(total_scores)
            print(np.argmax(detect))
            trust_factor = total_scores[class_id]
            if trust_factor > 0.2:
                x_center = int(detect[0] * width)
                y_center = int(detect[1] * height)
                w = int(detect[2] * width)
                h = int(detect[3] * height)
                x = int(x_center - w / 2)
                y = int(x_center - h / 2)
                boxes.append([x,y,w,h])
                trusts.append(float(trust_factor))
                class_ids.append(class_id)

    for index in range(len(boxes)):
        # if index in indexes:
        x,y,w,h = boxes[index]
        label = self.classes[class_ids[index]]
        trust = round(trusts[index], 2)
        text = f"{label}, Trust: {trust}"
        cv2.rectangle(img, (x,y), (x + w, y + h), (0,255,0), 2)
        cv2.putText(img, text, (x - 20, y + 40), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255), 2)

How I labeled my image: This is how I labeled the images

The result after training: This is the result that i get

In the result you can see that I dont see the label of the iphone and the rectangle is way above than I have selected. The airpods are also way bigger and samsung is at the bottom, which I didn't select like this.