Problems with contouring and binarization(thresholding) [closed]
My first time posting to this forum... I am trying to extract text from images using opencv 3.0.* and then feed it to the OCR(tesseract).
In the attached code there are 2 problems.
- From GrayScale to binarization(thresholding) I lose some of the text(like Uverse).
- On findingContours, the bacground and the text is all converted into black background.
Is there a better approach than what I am doing ? Tried Gaussian blur, made the picture too smooth . C:\fakepath\myImageContoured.jpg C:\fakepath\myGrayImageOpenCV.jpg C:\fakepath\ImageOTSU.jpg C:\fakepath\myImageDilated.jpg.
//original image - vip1200.jpg
//convert image to gray scale - result in myGrayImageOpenCV.jpg
byte[] data = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
Mat sourceMat = new Mat(image.getHeight(), image.getWidth(), CvType.CV_8UC3);
sourceMat.put(0, 0, data);
Mat greyScaledMat = new Mat(image.getHeight(),image.getWidth(),CvType.CV_8UC1);
Imgproc.cvtColor(sourceMat, greyScaledMat, Imgproc.COLOR_RGB2GRAY);
//Threshold the gray image - result in ImageOTSU.jpg
byte[] data1 = new byte[greyScaledMat.rows() * greyScaledMat.cols() * (int)(greyScaledMat.elemSize())];
greyScaledMat.get(0, 0, data1);
grayScaledBufImg = new BufferedImage(greyScaledMat.cols(),greyScaledMat.rows(), BufferedImage.TYPE_BYTE_GRAY);
grayScaledBufImg.getRaster().setDataElements(0, 0, greyScaledMat.cols(), greyScaledMat.rows(), data1);
Mat destinationMat = new Mat(greyScaledMat.rows(),greyScaledMat.cols(),greyScaledMat.type());
destinationMat = greyScaledMat;
//Imgproc.threshold(greyScaledMat,destinationMat,127,255,Imgproc.THRESH_TOZERO);
//Imgproc.threshold(greyScaledMat,destinationMat,127,255,Imgproc.THRESH_TOZERO_INV);
// Imgcodecs.imwrite("C:\\scme\\myImageThreshTOZERO_INV.jpg", destinationMat);
Imgproc.threshold(greyScaledMat, destinationMat, -1, 255, Imgproc.THRESH_BINARY_INV+Imgproc.THRESH_OTSU); //very good
//Print the above threshold OTSU image
Imgcodecs.imwrite("C:\\scme\\ImageOTSU.jpg", destinationMat);
//dilate the binarized image - results in myImageDilated.jpg
Imgproc.dilate(destinationMat, destinationMat, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2)));
Imgcodecs.imwrite("C:\\scme\\myImageDilated.jpg", destinationMat);
//find contours to detect the text regions - result in myImageContoured.jpg
Mat heirarchy= new Mat();
Point shift=new Point(150,0);
List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
Imgproc.findContours(destinationMat, contours, heirarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
double[] cont_area =new double[contours.size()];
for(int i=0; i< contours.size();i++){
if (Imgproc.contourArea(contours.get(i)) > 50 ){
Rect rect = Imgproc.boundingRect(contours.get(i));
cont_area[i]=Imgproc.contourArea(contours.get(i));
if (rect.height > 25){
Imgproc.rectangle(destinationMat, new Point(rect.x,rect.y), new Point(rect.x+rect.width,rect.y
+rect.height),new Scalar(0,0,255)); // System.out.println(rect.x +"-"+ rect.y +"-"+ rect.height+"-"+rect.width); } } }
Imgcodecs.imwrite("C:\\scme\\myImageContoured.jpg", destinationMat);