Revision history [back]

Detecting articles from a newspaper using opencv?

I tried this using dilation and the code I used is below

#include "stdafx.h"

#include <fstream>
#include <iostream>
#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"

using namespace cv;
using namespace std;


int main(int argc, char* argv[]) {

Mat matImage = imread("Images/newspaper2.jpeg");

if (!matImage.data) {
    cout << "Unable to open the file\n" << endl;
    return 1;
}

int iterations=5;
Point anchor;
Mat grayImage;
Mat threshImage;
Mat dilatedImage;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;


cvtColor(matImage, grayImage, COLOR_RGB2GRAY);
threshold(grayImage, threshImage, 150, 255, THRESH_BINARY_INV);
Mat element = getStructuringElement(MORPH_CROSS,Size(3,3));
dilate(threshImage, dilatedImage, element, anchor = Point(-1, -1), iterations);
findContours(dilatedImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE, Point(0, 0));

vector<Rect> boundRect(contours.size());

for(int contour = 0; contour < contours.size(); contour++){
    boundRect[contour] = boundingRect(contours[contour]);
    rectangle(dilatedImage, boundRect[contour].tl(), boundRect[contour].br(), (255,0,255), 2);
}

imshow("Contours",dilatedImage);
imwrite("Output.png", dilatedImage);
waitKey(0);
return 0;
 }

and the output I got were not satsfying. This is my input image Input imagee But my output is this. Output image

But I didn't want this. I then saw this another approach in this research paper "Logical segmentation for article extraction in nespapers " in which they used CRF model and labeled the headings,paragraphs and text lines. You can see their approach from the following image Research paper process

But my problem is I don't know where to start in this one and also how to use crf ,can anyone please help me out in finding a way or can suggest some other approach