I am starting with the following image
and I am identifying the horizontal lines in order to find the form fields. I am using the following code
Mat src=imread(filename);
if(!src.data) cerr<<"problem loading image"<<endl; Mat rsz; Size size(800,900); resize(src, rsz, size); Mat gray; if(rsz.channels()==3){ cvtColor(rsz,gray,CV_BGR2GRAY); }else{ gray=rsz; } Mat bw; adaptiveThreshold(~gray, bw, 255,CV_ADAPTIVE_THRESH_MEAN_C,THRESH_BINARY, 15,-2); Mat horizontal = bw.clone(); Mat vertical =bw.clone(); int scale = 25; int horizontalsize = horizontal.cols/scale; Mat horizontalStructure = getStructuringElement(MORPH_RECT,Size(horizontalsize,1)); erode(horizontal, horizontal, horizontalStructure, Point(-1, -1)); dilate(horizontal, horizontal, horizontalStructure, Point(-1, -1)); vector<Vec4i> hierarchy; vector<vector<Point> > contours; findContours(horizontal, contours, hierarchy, CV_RETR_EXTERNAL,CV_CHAIN_APPROX_SIMPLE, Point(0, 0)); vector<vector<Point> > contours_poly( contours.size() ); vector<Rect> boundRect( contours.size() ); vector<Mat> rois; for (size_t i = 0; i < contours.size(); i++) { approxPolyDP( Mat(contours[i]), contours_poly[i], 3,true ); boundRect[i] = boundingRect( Mat(contours_poly[i]) ); rois.push_back(rsz(boundRect[i]).clone()); rectangle( rsz, boundRect[i].tl(), boundRect[i].br(),Scalar(0, 255, 0), 1, 8, 0 ); }
which gives me
which seems to work almost perfectly except it finds the line in the www.labor.ny.gov link in the top right. I don't want this line to be found. So somehow I have to filter the bounding rectangles based on whether they have empty space above them (the link obviously would not pass this filter). Does anyone know how I can implement this filter? From the code you can see that I have the bounding rectangle objects which contain their position, so I think I just need to check if some small bounding rectangle above it is empty.