Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

Extracting information from national id

I'm trying to do OCR arabic on the following ID but I get a very noisy picture, and can't extract information from it.

Here is my attempt

import tesserocr
from PIL import Image
import pytesseract
import matplotlib as plt
import cv2
import imutils
import numpy as np

image = cv2.imread(r'c:\ahmed\ahmed.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray,11,18,18)

gray = cv2.GaussianBlur(gray,(5,5), 0)

kernel = np.ones((2,2), np.uint8)


gray = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,11,2)
#img_dilation = cv2.erode(gray, kernel, iterations=1)


#cv2.imshow("dilation", img_dilation)

cv2.imshow("gray", gray)

text = pytesseract.image_to_string(gray, lang='ara')
print(text)
with open(r"c:\ahmed\file.txt", "w", encoding="utf-8") as myfile:
    myfile.write(text)
cv2.waitKey(0)

image description image description

Extracting information from national id

I'm trying to do OCR arabic on the following ID but I get a very noisy picture, and can't extract information from it.

Here is my attempt

import tesserocr
from PIL import Image
import pytesseract
import matplotlib as plt
import cv2
import imutils
import numpy as np

image = cv2.imread(r'c:\ahmed\ahmed.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray,11,18,18)

gray = cv2.GaussianBlur(gray,(5,5), 0)

kernel = np.ones((2,2), np.uint8)


gray = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,11,2)
#img_dilation = cv2.erode(gray, kernel, iterations=1)


#cv2.imshow("dilation", img_dilation)

cv2.imshow("gray", gray)

text = pytesseract.image_to_string(gray, lang='ara')
print(text)
with open(r"c:\ahmed\file.txt", "w", encoding="utf-8") as myfile:
    myfile.write(text)
cv2.waitKey(0)

another sample

image description

image description image description