Revision history [back]

Extracting information from national id

I'm trying to do OCR arabic on the following ID but I get a very noisy picture, and can't extract information from it.

Here is my attempt

import tesserocr
from PIL import Image
import pytesseract
import matplotlib as plt
import cv2
import imutils
import numpy as np

image = cv2.imread(r'c:\ahmed\ahmed.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray,11,18,18)

gray = cv2.GaussianBlur(gray,(5,5), 0)

kernel = np.ones((2,2), np.uint8)


gray = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,11,2)
#img_dilation = cv2.erode(gray, kernel, iterations=1)


#cv2.imshow("dilation", img_dilation)

cv2.imshow("gray", gray)

text = pytesseract.image_to_string(gray, lang='ara')
print(text)
with open(r"c:\ahmed\file.txt", "w", encoding="utf-8") as myfile:
    myfile.write(text)
cv2.waitKey(0)

image description

Extracting information from national id

I'm trying to do OCR arabic on the following ID but I get a very noisy picture, and can't extract information from it.

Here is my attempt

import tesserocr
from PIL import Image
import pytesseract
import matplotlib as plt
import cv2
import imutils
import numpy as np

image = cv2.imread(r'c:\ahmed\ahmed.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray,11,18,18)

gray = cv2.GaussianBlur(gray,(5,5), 0)

kernel = np.ones((2,2), np.uint8)


gray = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,11,2)
#img_dilation = cv2.erode(gray, kernel, iterations=1)


#cv2.imshow("dilation", img_dilation)

cv2.imshow("gray", gray)

text = pytesseract.image_to_string(gray, lang='ara')
print(text)
with open(r"c:\ahmed\file.txt", "w", encoding="utf-8") as myfile:
    myfile.write(text)
cv2.waitKey(0)

another sample

image description