Revision history [back]

Text and tables are easy to remove using dilate() function. And then I guess there are multiple solutions, but this is what I've done and achieved:

Mat image = imread("page.png");
Mat gray;
cvtColor(image, gray, CV_BGR2GRAY);

Mat kernel = getStructuringElement(MORPH_RECT, Size(8,8));
dilate(gray, gray, kernel);                         //dilate to remove text and tables
threshold(gray, gray, 254, 255, THRESH_TOZERO);     //change white background to black
threshold(gray, gray, 0, 255, THRESH_BINARY_INV);   //invert binary image for easier processing

//try to fill images rectangles and remove noise
morphologyEx(gray, gray, MORPH_CLOSE, kernel);      
morphologyEx(gray, gray, MORPH_OPEN, kernel);

//find contours and approximate to squares
vector<vector<Point>> contours;
findContours(gray, contours, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
vector<vector<Point>> squares(contours.size());
Mat mask(gray.rows, gray.cols, CV_8UC1, Scalar(0));
for (int j = 0; j < contours.size(); j++){
    if (contourArea(contours[j]) > 2000){       //optionally filter noise (too small contours)
        approxPolyDP(contours[j], squares[j], 50, true);
        drawContours(mask, squares, j, Scalar(255), -1);
    }
}

Mat final;
image.copyTo(final, mask);
imshow("Mask", mask);
imshow("Result", final);
waitKey();

So these are the results for the mask and the final matrices: image description

image description

Obviously, you can extract every independent image based on the found squares. Also, you'll probably need to tweak the different parameters to obtain the best results on your complete set of input images