Ask Your Question

opencvNewbie's profile - activity

2015-01-29 12:57:11 -0600 asked a question Problems with contouring and binarization(thresholding)

My first time posting to this forum... I am trying to extract text from images using opencv 3.0.* and then feed it to the OCR(tesseract).

In the attached code there are 2 problems.

  1. From GrayScale to binarization(thresholding) I lose some of the text(like Uverse).
  2. On findingContours, the bacground and the text is all converted into black background.

Is there a better approach than what I am doing ? Tried Gaussian blur, made the picture too smooth . C:\fakepath\myImageContoured.jpg C:\fakepath\myGrayImageOpenCV.jpg C:\fakepath\ImageOTSU.jpg C:\fakepath\myImageDilated.jpg.

                 //original image - vip1200.jpg
                 //convert image to gray scale - result in myGrayImageOpenCV.jpg
         byte[] data = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
         Mat sourceMat = new Mat(image.getHeight(), image.getWidth(), CvType.CV_8UC3);
         sourceMat.put(0, 0, data);

         Mat greyScaledMat = new Mat(image.getHeight(),image.getWidth(),CvType.CV_8UC1);
         Imgproc.cvtColor(sourceMat, greyScaledMat, Imgproc.COLOR_RGB2GRAY);

         //Threshold the gray image - result in ImageOTSU.jpg
         byte[] data1 = new byte[greyScaledMat.rows() * greyScaledMat.cols() * (int)(greyScaledMat.elemSize())];
         greyScaledMat.get(0, 0, data1);
         grayScaledBufImg = new BufferedImage(greyScaledMat.cols(),greyScaledMat.rows(), BufferedImage.TYPE_BYTE_GRAY);
         grayScaledBufImg.getRaster().setDataElements(0, 0, greyScaledMat.cols(), greyScaledMat.rows(), data1);

         Mat destinationMat = new Mat(greyScaledMat.rows(),greyScaledMat.cols(),greyScaledMat.type());
         destinationMat = greyScaledMat;
         //Imgproc.threshold(greyScaledMat,destinationMat,127,255,Imgproc.THRESH_TOZERO);
         //Imgproc.threshold(greyScaledMat,destinationMat,127,255,Imgproc.THRESH_TOZERO_INV);
        // Imgcodecs.imwrite("C:\\scme\\myImageThreshTOZERO_INV.jpg", destinationMat);
         Imgproc.threshold(greyScaledMat, destinationMat, -1, 255, Imgproc.THRESH_BINARY_INV+Imgproc.THRESH_OTSU); //very good
         //Print the above threshold OTSU image
         Imgcodecs.imwrite("C:\\scme\\ImageOTSU.jpg", destinationMat);
        //dilate the  binarized image - results in myImageDilated.jpg
         Imgproc.dilate(destinationMat, destinationMat, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2)));
         Imgcodecs.imwrite("C:\\scme\\myImageDilated.jpg", destinationMat);

         //find contours to detect the text regions - result in myImageContoured.jpg
         Mat heirarchy= new Mat();
         Point shift=new Point(150,0);

         List<MatOfPoint> contours = new ArrayList<MatOfPoint>();    
         Imgproc.findContours(destinationMat, contours, heirarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
         double[] cont_area =new double[contours.size()]; 

         for(int i=0; i< contours.size();i++){
             if (Imgproc.contourArea(contours.get(i)) > 50 ){
                 Rect rect = Imgproc.boundingRect(contours.get(i));
                 cont_area[i]=Imgproc.contourArea(contours.get(i));

                 if (rect.height > 25){
                     Imgproc.rectangle(destinationMat, new Point(rect.x,rect.y), new Point(rect.x+rect.width,rect.y

+rect.height),new Scalar(0,0,255)); // System.out.println(rect.x +"-"+ rect.y +"-"+ rect.height+"-"+rect.width); } } }

         Imgcodecs.imwrite("C:\\scme\\myImageContoured.jpg", destinationMat);