extract text from image using opencv with Java

asked 2020-07-13 01:22:08 -0500

Nagarjuna gravatar image

updated 2020-07-13 02:25:32 -0500

I'm trying to use Opencv with JAVA.I could not able to extract text from image. Getting as invalid text.Could you please help me on this.

image: C:\fakepath\PASSPORT-crop.jpg iamge 1 : C:\fakepath\ppassport1-crop.png

Code: { System.load("C:/DIGITAL_HOME/ocr/opencv_java420.dll");

    String storeFile="D:/OCR Images/black/PP11.jpg";//ppassport1.png PASSPORT.jpg
    String[] splitResFile = "PP11.jpg".split("\\.");
    String ocrFile = "D:/OCR Images/crop/"+splitResFile[0]+"-ocr."+splitResFile[1];
    adjustPixels(storeFile, splitResFile, ocrFile,"MRZ","");
    String grayFile = convertGrayImage(splitResFile, ocrFile);
    String cropImage  = cropImages(splitResFile, grayFile,"MRZ","N");

    BufferedImage bufferedImage1 = ImageIO.read(new File(cropImage));
    try {
        tesseract.setLanguage("eng");
              List<Word> words = tesseract.getWords(bufferedImage1, 2);

              for (int i = 0; i < words.size(); i++) {
                Word word = words.get(i);
                if(null != word.getText()){
                    docText.put("lineno:"+i, word.getText());
                    System.out.println("lineno:"+i+"----"+ word.getText());
                }
              }  

         } catch (Exception e) {
             e.printStackTrace();
    }

}

private static void adjustPixels(String storeFile, String[] splitResFile, String ocrFile, String documentType, String string2) { try{ BufferedImage originalImage = ImageIO.read(new File(storeFile)); int type = originalImage.getType() == 0? BufferedImage.TYPE_INT_ARGB : originalImage.getType(); BufferedImage resizeImageHintJpg = null;

        if("MRZ".equalsIgnoreCase(documentType)){
            resizeImageHintJpg = resizeImageWithHint(originalImage, type,600,839);
        }else{
            resizeImageHintJpg = resizeImageWithHint(originalImage, type,1024,600); 
        }

    ImageIO.write(resizeImageHintJpg, splitResFile[1], new File(ocrFile));
}catch(IOException e){
    e.printStackTrace();
}catch(Exception e){
    e.printStackTrace();
}

}

private static BufferedImage resizeImageWithHint(BufferedImage originalImage, int type,int x,int y) { BufferedImage resizedImage = null; try{ int IMG_WIDTH = x; int IMG_HEIGHT = y; resizedImage = new BufferedImage(IMG_WIDTH, IMG_HEIGHT, type); Graphics2D g = resizedImage.createGraphics(); g.drawImage(originalImage, 0, 0, IMG_WIDTH, IMG_HEIGHT, null); g.dispose();
g.setComposite(AlphaComposite.Src); g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR); g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); }catch (Exception e) { e.printStackTrace(); }

return resizedImage;

}

private static String convertGrayImage(String[] splitResFile, String ocrFile) { String grayFile=null; try{ BufferedImage bufferImage = ImageIO.read(new File(ocrFile));
byte[] data = ((DataBufferByte) bufferImage.getRaster().getDataBuffer()).getData(); Mat mat = new Mat(bufferImage.getHeight(), bufferImage.getWidth(), CvType.CV_8UC3); mat.put(0, 0, data); Mat mat1 = new Mat(bufferImage.getHeight(),bufferImage.getWidth(),CvType.CV_8UC1); Imgproc.cvtColor(mat, mat1, Imgproc.COLOR_RGB2GRAY);

byte[] data1 = new byte[mat1.rows() * mat1.cols() * (int)(mat1.elemSize())];
mat1.get(0, 0, data1);
BufferedImage image1 = new BufferedImage(mat1.cols(),mat1.rows(), BufferedImage.TYPE_BYTE_GRAY);
image1.getRaster().setDataElements(0, 0, mat1.cols(), mat1.rows(), data1);
grayFile="D:/OCR Images/crop/"+splitResFile[0]+"-gray.png";
File ouptut = new File(grayFile);
ImageIO.write(image1, splitResFile[1], ouptut);
}catch(IOException e){
    e.printStackTrace();
}catch(Exception e){
    e.printStackTrace();
}
return grayFile;

}

private static String cropImages(String[] splitResFile, String grayFile, String documentType,String multiLineFirstName) { String cropImage=null; BufferedImage bufferedImage =null; BufferedImage image=null;

try {
    if("MRZ".equalsIgnoreCase(documentType)){

        image=getBufferImage(bufferedImage,grayFile,25,750,540,70); // photo

    }

          cropImage=grayFile;


    cropImage="D:/OCR Images/crop/"+splitResFile[0]+"-crop."+splitResFile[1];
    File pathFile = new File(cropImage);
    ImageIO.write(image,splitResFile[1], pathFile);

} catch (Exception e) {
    e.printStackTrace();
}

return cropImage;

}

Output:

lineno:0----P<INDBOLLIVARAM<<NAGARJUNA<<<<<<<<<<<<<<<<<<

lineno:1----T8122345<41ND8608147142909185<<<<<<<<<<<<<<<x

another output:

lineno:0----P<GIRI<<IANlSH<<<<(<<<<<<<<<<<<<<<<<<<<<<<<<l

lineno:1----K8016274<21ND97D1170M807277<<<<<<<<<<<<<<<z
edit retag flag offensive close merge delete

Comments

i do not see your program using opencv, and we probably cannot help with tesseract

berak gravatar imageberak ( 2020-07-13 01:46:37 -0500 )edit

Please check the description..Just now edited

Nagarjuna gravatar imageNagarjuna ( 2020-07-13 02:23:38 -0500 )edit