Ask Your Question

Revision history [back]

Why pytesseract.image_to_data fails to recognize text?

Hi! I am new to opencv,I am working on a project trying to recognize traffic signs.The enviroment I am going to use this project is indoors, it is for a self-driving small car which will have to navigate around a track. I've decided to first rescognize the shape of the object, then create a new picture from the ROI, and try to recognize the text on that picture.I am testing it with my laptop's webcamera which captures a 640x480 frame. The first issue i have is that picture that my code is trying to find the contours on is a little bit noisy. I tried several approaches, right now I am using median blurring--> Canny edge detection -->dialation. Is there something I am misssing? It provides relatively fair results on finding shapes, but it doesn't always work as inteded.(finds non positive results) Furthermore, the text recoginition part is also working but a fairly high fail rate. So it sometimes founds absolutely nonsense words, but it sometimes founds 'STOP'. I've also tought i could is pytesseract.image_to_string or image_to_boxes. Would any of that be a better approach? Here is my code: import cv2 import pytesseract import numpy as np font = cv2.FONT_HERSHEY_COMPLEX def empty(x): print(x) pass

text recognition

def find_text(img): pytesseract.pytesseract.tesseract_cmd = 'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe' ##strings to find stop = "STOP"

############################

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img=cv2.resize(img,(300,150))

##word recognition
hImg, wImg, _ = img.shape
boxes = pytesseract.image_to_data(img)
print(boxes)

for x, b in enumerate(boxes.splitlines()):

    if x != 0:  ##first row headings
        b = b.split()  ##strings
        print(b)
        x1, y1, w1, h1 = int(b[6]), int(b[7]), int(b[8]), int(b[9])
        cv2.rectangle(img, (x1, y1), (w1 + x1, h1 + y1), (0, 0, 255), 3)
        cv2.imshow("roi", img)
        if len(b) == 12:
            if b[11] == stop:
                print("found it")
#

parameter window

cv2.namedWindow("parameters") cv2.resizeWindow("paramaters",640,240)

TRACKBARS

cv2.createTrackbar("threshold1", "parameters", 81, 255, empty) cv2.createTrackbar("threshold2", "parameters", 255, 255, empty)

#

kernel = np.ones((3, 3), np.uint8) ## maszk cap = cv2.VideoCapture(0)

while (cap.isOpened()): ret,frame = cap.read() if ret == True: ##beolvasott video értékeinek felvétele width=cap.get(cv2.CAP_PROP_FRAME_WIDTH) height=cap.get(cv2.CAP_PROP_FRAME_HEIGHT) fps=cap.get(cv2.CAP_PROP_FPS) imgcopy=frame ##másolat készítése

    ##GET TRACKBAR POS##

    threshold1 = cv2.getTrackbarPos('threshold1', "parameters")
    threshold2 = cv2.getTrackbarPos('threshold1', "parameters")



    grayvideo = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blur =cv2.bilateralFilter(grayvideo,7,200,200)
    blur2 = cv2.medianBlur(grayvideo, 11)
    #bilblur=cv2.bilateralFilter(grayvideo,7,50,50)

    imgCanny = cv2.Canny(blur2, threshold1, threshold2)
    #ret3,th3 = cv2.threshold(grayvideo,threshold1,threshold2,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    cv2.imshow("Canny", imgCanny)
    blur2 = cv2.medianBlur(imgCanny,1)
    cv2.imshow("blur2", blur2)

    dilated = cv2.dilate(imgCanny,kernel,iterations=1)
    cv2.imshow("dilated", dilated)


    contours,hierarchy =cv2.findContours(dilated,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)

        #print(len(contours))
        ##area of conotur and boundig rect
    for contour in contours:  ##area of conotur and boundig rect
            area=cv2.contourArea(contour)
            if area>5000:

                    peri = cv2.arcLength(contour, True)

                    approx = cv2.approxPolyDP(contour, 0.01 * peri, True)

                    objCor = len(approx)
                    x, y, w, h = cv2.boundingRect(approx)
                    if objCor == 8:
                            Type = "octa"
                            roi = imgcopy[y:y + h, x:x + w]
                            find_text(roi)
                            cv2.imwrite("roi.png", roi)

                    else:
                            Type = "None"
                    cv2.drawContours(frame, contours, -1, (0, 255, 0), 2)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1)
                    cv2.putText(frame, Type, (x, y), font, 0.5,
                                (0, 255, 255), 2)

            ###
            #cv2.drawContours(img,contours,-1,(0,255,0),2)




            cv2.imshow("frame",frame)

    if cv2.waitKey(1) & 0xFF == ord('q'): break

cv2.waitKey(0)

cv2.destroyAllWindows() cap.release()

Is there any way to improve detection success? Thank you in advance :)

Why pytesseract.image_to_data fails to recognize text?

Hi! I am new to opencv,I am working on a project trying to recognize traffic signs.The enviroment I am going to use this project is indoors, it is for a self-driving small car which will have to navigate around a track. I've decided to first rescognize the shape of the object, then create a new picture from the ROI, and try to recognize the text on that picture.I am testing it with my laptop's webcamera which captures a 640x480 frame. The first issue i have is that picture that my code is trying to find the contours on is a little bit noisy. I tried several approaches, right now I am using median blurring--> Canny edge detection -->dialation. Is there something I am misssing? It provides relatively fair results on finding shapes, but it doesn't always work as inteded.(finds non positive results) Furthermore, the text recoginition part is also working but a fairly high fail rate. So it sometimes founds absolutely nonsense words, but it sometimes founds 'STOP'. I've also tought i could is pytesseract.image_to_string or image_to_boxes. Would any of that be a better approach? Here is my code: code:

import cv2
import pytesseract
import numpy as np
font = cv2.FONT_HERSHEY_COMPLEX
def empty(x):
        print(x)
        pass

text recognition

pass #text recognition def find_text(img): pytesseract.pytesseract.tesseract_cmd = 'C:\Program 'C:\\Program Files (x86)\Tesseract-OCR\tesseract.exe' (x86)\\Tesseract-OCR\\tesseract.exe' ##strings to find stop = "STOP"

"STOP"

    ############################

 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

 img=cv2.resize(img,(300,150))

 ##word recognition
 hImg, wImg, _ = img.shape
 boxes = pytesseract.image_to_data(img)
 print(boxes)

 for x, b in enumerate(boxes.splitlines()):
      if x != 0:  ##first row headings
         b = b.split()  ##strings
         print(b)
         x1, y1, w1, h1 = int(b[6]), int(b[7]), int(b[8]), int(b[9])
         cv2.rectangle(img, (x1, y1), (w1 + x1, h1 + y1), (0, 0, 255), 3)
         cv2.imshow("roi", img)
         if len(b) == 12:
             if b[11] == stop:
                 print("found it")
#

parameter window

#parameter window cv2.namedWindow("parameters") cv2.resizeWindow("paramaters",640,240)

TRACKBARS

cv2.resizeWindow("paramaters",640,240) #TRACKBARS cv2.createTrackbar("threshold1", "parameters", 81, 255, empty) cv2.createTrackbar("threshold2", "parameters", 255, 255, empty)

#

empty) kernel = np.ones((3, 3), np.uint8) ## maszk cap = cv2.VideoCapture(0)

cv2.VideoCapture(0) while (cap.isOpened()): ret,frame = cap.read() if ret == True: ##beolvasott video értékeinek felvétele width=cap.get(cv2.CAP_PROP_FRAME_WIDTH) height=cap.get(cv2.CAP_PROP_FRAME_HEIGHT) fps=cap.get(cv2.CAP_PROP_FPS) imgcopy=frame ##másolat készítése

    ##GET készítése

        #GET TRACKBAR POS##

POS
        threshold1 = cv2.getTrackbarPos('threshold1', "parameters")
     threshold2 = cv2.getTrackbarPos('threshold1', "parameters")

      grayvideo = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
     blur =cv2.bilateralFilter(grayvideo,7,200,200)
     blur2 = cv2.medianBlur(grayvideo, 11)
     #bilblur=cv2.bilateralFilter(grayvideo,7,50,50)

     imgCanny = cv2.Canny(blur2, threshold1, threshold2)
     #ret3,th3 = cv2.threshold(grayvideo,threshold1,threshold2,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
     cv2.imshow("Canny", imgCanny)
     blur2 = cv2.medianBlur(imgCanny,1)
     cv2.imshow("blur2", blur2)

     dilated = cv2.dilate(imgCanny,kernel,iterations=1)
     cv2.imshow("dilated", dilated)


     contours,hierarchy =cv2.findContours(dilated,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)

         #print(len(contours))
            #area of conotur and boundig rect
        for contour in contours:  ##area of conotur and boundig rect
    for contour in contours:  ##area of conotur and boundig rect
            area=cv2.contourArea(contour)
             if area>5000:

                     peri = cv2.arcLength(contour, True)

                     approx = cv2.approxPolyDP(contour, 0.01 * peri, True)

                     objCor = len(approx)
                     x, y, w, h = cv2.boundingRect(approx)
                     if objCor == 8:
                             Type = "octa"
                             roi = imgcopy[y:y + h, x:x + w]
                             find_text(roi)
                             cv2.imwrite("roi.png", roi)

                     else:
                             Type = "None"
                     cv2.drawContours(frame, contours, -1, (0, 255, 0), 2)
                     cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1)
                     cv2.putText(frame, Type, (x, y), font, 0.5,
                                 (0, 255, 255), 2)

            ###
            #cv2.drawContours(img,contours,-1,(0,255,0),2)
              cv2.imshow("frame",frame)

     if cv2.waitKey(1) & 0xFF == ord('q'): break

cv2.waitKey(0)
cv2.destroyAllWindows()
cap.release()

cv2.waitKey(0)

cv2.destroyAllWindows() cap.release()

Is there any way to improve detection success? Thank you in advance :)

Why pytesseract.image_to_data fails to recognize text?

Hi! I am new to opencv,I am working on a project trying to recognize traffic signs.The enviroment I am going to use this project is indoors, it is for a self-driving small car which will have to navigate around a track. I've decided to first rescognize the shape of the object, then create a new picture from the ROI, and try to recognize the text on that picture.I am testing it with my laptop's webcamera which captures a 640x480 frame. The first issue i have is that picture that my code is trying to find the contours on is a little bit noisy. I tried several approaches, right now I am using median blurring--> Canny edge detection -->dialation. Is there something I am misssing? It provides relatively fair results on finding shapes, but it doesn't always work as inteded.(finds non positive results) Furthermore, the text recoginition part is also working but a fairly high fail rate. So it sometimes founds absolutely nonsense words, but it sometimes founds 'STOP'. I've also tought i could is pytesseract.image_to_string or image_to_boxes. Would any of that be a better approach? Here is my code:

import cv2
import pytesseract
import numpy as np
font = cv2.FONT_HERSHEY_COMPLEX
def empty(x):
        print(x)
        pass
#text recognition
def find_text(img):
    pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'
    ##strings to find
    stop = "STOP"

    ############################

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    img=cv2.resize(img,(300,150))

    ##word recognition
    hImg, wImg, _ = img.shape
    boxes = pytesseract.image_to_data(img)
    print(boxes)

    for x, b in enumerate(boxes.splitlines()):
        if x != 0:  ##first row headings
            b = b.split()  ##strings
            print(b)
            x1, y1, w1, h1 = int(b[6]), int(b[7]), int(b[8]), int(b[9])
            cv2.rectangle(img, (x1, y1), (w1 + x1, h1 + y1), (0, 0, 255), 3)
            cv2.imshow("roi", img)
            if len(b) == 12:
                if b[11] == stop:
                    print("found it")

#parameter window
cv2.namedWindow("parameters")
cv2.resizeWindow("paramaters",640,240)
#TRACKBARS 
cv2.createTrackbar("threshold1", "parameters", 81, 255, empty)
cv2.createTrackbar("threshold2", "parameters", 255, 255, empty)

kernel = np.ones((3, 3), np.uint8) ## maszk 
cap = cv2.VideoCapture(0)

while (cap.isOpened()):
    ret,frame   = cap.read()
    if ret == True:
        ##beolvasott video értékeinek felvétele
        width=cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height=cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        fps=cap.get(cv2.CAP_PROP_FPS)
        imgcopy=frame ##másolat készítése

        #GET TRACKBAR POS
        threshold1 = cv2.getTrackbarPos('threshold1', "parameters")
        threshold2 = cv2.getTrackbarPos('threshold1', "parameters")

        grayvideo = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        blur =cv2.bilateralFilter(grayvideo,7,200,200)
        blur2 = cv2.medianBlur(grayvideo, 11)
        #bilblur=cv2.bilateralFilter(grayvideo,7,50,50)

        imgCanny = cv2.Canny(blur2, threshold1, threshold2)
        #ret3,th3 = cv2.threshold(grayvideo,threshold1,threshold2,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        cv2.imshow("Canny", imgCanny)
        blur2 = cv2.medianBlur(imgCanny,1)
        cv2.imshow("blur2", blur2)

        dilated = cv2.dilate(imgCanny,kernel,iterations=1)
        cv2.imshow("dilated", dilated)


        contours,hierarchy =cv2.findContours(dilated,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)

            #print(len(contours))
            #area of conotur and boundig rect
        for contour in contours:  ##area of conotur and boundig rect
                area=cv2.contourArea(contour)
                if area>5000:

                        peri = cv2.arcLength(contour, True)

                        approx = cv2.approxPolyDP(contour, 0.01 * peri, True)

                        objCor = len(approx)
                        x, y, w, h = cv2.boundingRect(approx)
                        if objCor == 8:
                                Type = "octa"
                                roi = imgcopy[y:y + h, x:x + w]
                                find_text(roi)
                                cv2.imwrite("roi.png", roi)

                        else:
                                Type = "None"
                        cv2.drawContours(frame, contours, -1, (0, 255, 0), 2)
                        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 1)
                        cv2.putText(frame, Type, (x, y), font, 0.5,
                                    (0, 255, 255), 2)

                #cv2.drawContours(img,contours,-1,(0,255,0),2)
                cv2.imshow("frame",frame)

        if cv2.waitKey(1) & 0xFF == ord('q'): break

cv2.waitKey(0)
cv2.destroyAllWindows()
cap.release()

Is there any way to improve detection success? Thank you in advance :)