1 | initial version |
I think the other answer is way to complicated for this problem. Basically you will need to do the following steps
This code snippet should do about what you need. It contains much more functionality, but it shouldn't be hard to filter out the needed parts, which I have no time for now.
// workshop_face_detect.cpp : Performing LBP CUDA face detection on live video stream
// Make it possible to segment out skin color
#include <opencv/cv.h>
#include <opencv/cvaux.h>
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/gpu/gpu.hpp"
#include <iostream>
#include <stdio.h>
using namespace std;
using namespace cv;
gpu::CascadeClassifier_GPU face_cascade;
// Basic function to calculate gradient magnitude and angle matrix based on input
vector<Mat> calculate_gradient(Mat input){
Mat img_smooth;
GaussianBlur( input, img_smooth, Size(11,11), 5);
Mat grad_x = Mat(img_smooth.rows, img_smooth.cols, CV_64F);
Mat grad_y = Mat(img_smooth.rows, img_smooth.cols, CV_64F);
Sobel( img_smooth, grad_x, CV_64F, 1, 0, 3, 1, 0, BORDER_DEFAULT );
Sobel( img_smooth, grad_y, CV_64F, 0, 1, 3, 1, 0, BORDER_DEFAULT );
Mat magnitude = Mat(img_smooth.rows, img_smooth.cols, CV_64F);
sqrt(grad_x.mul(grad_x) + grad_y.mul(grad_y), magnitude);
Mat orientations = Mat(img_smooth.rows, img_smooth.cols, CV_64F);
for(int i = 0; i < img_smooth.rows; i++){
for(int j = 0; j < img_smooth.cols; j++){
orientations.at<double>(i,j) = fastAtan2(grad_x.at<double>(i,j), grad_y.at<double>(i,j));
}
}
vector<Mat> output;
output.push_back(magnitude);
output.push_back(orientations);
return output;
}
// Based on radial coördinates (angle and magnitude) calculate corresponding carthesian coördinates (x,y)
// Specific for OpenCV coördinate system
vector<Point> radial_to_carthesian(Point start, double angle, double magnitude){
const double PI = 3.141592;
// Since sin and cos functions already return values between [-1,1] we do not need to calculate signs for quadrants
// However, this corner is still given a standard
double angle_rad = angle * PI / 180;
double x_temp = cos(angle_rad) * magnitude;
double y_temp = sin(angle_rad) * magnitude;
double x_2 = start.x + x_temp;
double y_2 = start.y + y_temp;
// Create points
vector<Point> result;
result.push_back(start);
result.push_back(Point(x_2, y_2));
return result;
}
Mat visualize_gradients(vector<Mat> gradients, Mat input, int step, int magnitude){
Mat result = Mat(input.rows, input.cols, input.type());
input.copyTo(result);
for(int i = 3; i < input.rows; i = i + step){
for(int j = 3; j < input.cols; j = j + step){
// the points (i,j) now loop through the image with points to draw
// check in which quadrant the angle lies and then compute the correct x and y length
// Since data is now provided as [0-1] ranges, we need to multiply with 360 to get the actual angle
double angle = gradients[1].at<double>(i,j) * 360;
vector<Point> line_positions = radial_to_carthesian(Point(j,i), angle, magnitude);
line(result, line_positions[0], line_positions[1], Scalar(255,0,0), 1);
}
}
return result;
}
/** @function detectAndDisplay */
vector<double> detect_and_process( Mat frame, string window, vector<double> average_values, double buffer, double buffer_h, double buffer_sv )
{
// Frame has the original input frame
// Frame_reduced has a reduced ratio for processing, so that results can be displayed on smaller versions (1/4th original)
// Frame_orig has the reduced original input and won't be processed
Mat frame_reduced, frame_orig;
resize(frame, frame_reduced, Size(frame.cols/2, frame.rows/2));
frame_orig = frame_reduced.clone();
// Read in original frame, create grayscale and perform histogram equilization
// Needed for good detection results
Mat grayscale;
cvtColor( frame, grayscale, CV_BGR2GRAY );
equalizeHist( grayscale, grayscale );
// Create face detection frame and returned faces.
// Perform detection on this given set of data, given known model faces
gpu::GpuMat frame_gpu(grayscale), faces;
int detections = face_cascade.detectMultiScale( frame_gpu, faces, 1.05, 15);
// CUDA way of going through the detections that are retrieved
// Put them into a container
Mat obj_host;
faces.colRange(0, detections).download(obj_host);
Rect* facesRect = obj_host.ptr<Rect>();
// Container to capture all face regions within the image not having to use pointers to cuda memory again
vector<Rect> roi_faces;
for( int i = 0; i < detections; i++ )
{
// Select face region
Rect region( facesRect[i].x, facesRect[i].y, facesRect[i].width, facesRect[i].height );
// Add a RED rectangle around the blur region
rectangle(frame, facesRect[i], Scalar(0,0,255), 2);
stringstream face;
face << "Face " << (i + 1);
putText(frame, face.str(), Point(facesRect[i].x, facesRect[i].y - 15), 1, 2, Scalar(0, 0, 255), 1 );
// Add to the vector faces storage for further use
roi_faces.push_back(region);
}
// Add another processing step
// Combine all processing into one frame
Mat output = Mat::zeros(frame.rows, frame.cols*2, frame.type());
Rect roi(0, 0, frame.cols, frame.rows);
frame.copyTo( output(roi) );
// Create HSV color space and segment out only the H space
// Display this in two subwindows
Mat frame_hsv;
vector<Mat> hsv_channels;
cvtColor(frame_reduced, frame_hsv, CV_BGR2HSV);
split(frame_hsv, hsv_channels);
//Rect roi2(frame.cols, 0, frame.cols/2, frame.rows/2);
//frame_hsv.copyTo( output(roi2) );
// Create skin threshold segmentation into binary mask
// Only when there is actually a face detected
double average_h_top = average_values[0];
double average_h_bottom = average_values[1];
double average_s_top = average_values[2];
double average_s_bottom = average_values[3];
double average_v_top = average_values[4];
double average_v_bottom = average_values[5];
if( !(roi_faces.size() == 0) ){
// Define roi for the smaller window size
Rect region_small(roi_faces[0].x / 2, roi_faces[0].y / 2, roi_faces[0].width /2, roi_faces[0].height / 2);
// Grab HSV values only if the face is found to calculate the average values
Mat face_region_h = hsv_channels[0]( region_small );
Mat face_region_s = hsv_channels[1]( region_small );
Mat face_region_v = hsv_channels[2]( region_small );
// Inside that face we want to define a roi that selects the usefull information
// Leave out 30% of top, containing largest portion of hair.
// Leave out 10% of space at each side, containing background information due to head shape
int cols = face_region_h.cols; double cols_d = (double)cols;
int rows = face_region_h.rows; double rows_d = (double)rows;
Mat hsv_region_face = face_region_h( Rect((int)(cols_d * 0.2), (int)(rows_d * 0.3), (int)(cols_d * 0.6), (int)(rows_d * 0.7)) );
Mat hsv_region_face_s = face_region_s( Rect((int)(cols_d * 0.2), (int)(rows_d * 0.3), (int)(cols_d * 0.6), (int)(rows_d * 0.7)) );
Mat hsv_region_face_v = face_region_v( Rect((int)(cols_d * 0.2), (int)(rows_d * 0.3), (int)(cols_d * 0.6), (int)(rows_d * 0.7)) );
// Calculate average value of the mat
Scalar average = mean(hsv_region_face);
Scalar average_1 = mean(hsv_region_face_s);
Scalar average_2 = mean(hsv_region_face_v);
double average_h = average[0];
double average_s = average_1[0];
double average_v = average_2[0];
average_h_top = average_h + buffer_h;
average_h_bottom = average_h - buffer_h;
average_s_top = average_s + buffer_sv;
average_s_bottom = average_s - buffer_sv;
average_v_top = average_v + buffer_sv;
average_v_bottom = average_v - buffer_sv;
// Change elements of average values
average_values[0] = average_h_top;
average_values[1] = average_h_bottom;
average_values[2] = average_s_top;
average_values[3] = average_s_bottom;
average_values[4] = average_v_top;
average_values[5] = average_v_bottom;
}
// Segment the h-frame based on that value, creating a binary mask - FOR THE H VALUE
Mat h_threshold_h, h_threshold_low, h_threshold_high;
double out1 = threshold(hsv_channels[0], h_threshold_low, average_h_bottom, 1, THRESH_BINARY);
double out2 = threshold(hsv_channels[0], h_threshold_high, average_h_top, 1, THRESH_BINARY_INV);
h_threshold_h = h_threshold_low.mul(h_threshold_high);
// Segment the h-frame based on that value, creating a binary mask - FOR THE S VALUE
Mat h_threshold_s;
double out1_b = threshold(hsv_channels[1], h_threshold_low, average_s_bottom, 1, THRESH_BINARY);
double out2_b = threshold(hsv_channels[1], h_threshold_high, average_s_top, 1, THRESH_BINARY_INV);
h_threshold_s = h_threshold_low.mul(h_threshold_high);
// Segment the h-frame based on that value, creating a binary mask - FOR THE V VALUE
Mat h_threshold_v;
double out1_c = threshold(hsv_channels[2], h_threshold_low, average_v_bottom, 1, THRESH_BINARY);
double out2_c = threshold(hsv_channels[2], h_threshold_high, average_v_top, 1, THRESH_BINARY_INV);
h_threshold_v = h_threshold_low.mul(h_threshold_high);
// Combine for the three channels
Mat temp = h_threshold_h.mul(h_threshold_s);
Mat h_threshold = temp.mul(h_threshold_v);
// Erode and dilation to remove small elements from the feed
Mat structuring_element_erosion = getStructuringElement(MORPH_RECT, Size(3,3), Point(0,0));
Mat structuring_element_dilation = getStructuringElement(MORPH_RECT, Size(3,3), Point(0,0));
erode(h_threshold, h_threshold, structuring_element_erosion);
dilate(h_threshold, h_threshold, structuring_element_dilation);
// We could add two other masks for S and V value
// Apply the same average filtering
Mat threshold_3channels(h_threshold.rows, h_threshold.cols, CV_8UC3);
Mat in_b[] = { h_threshold * 255, h_threshold * 255, h_threshold * 255 };
int from_to_b[] = { 0,0, 1,1, 2,2 };
mixChannels( in_b, 3, &threshold_3channels, 1, from_to_b, 3 );
//Rect roi4(frame.cols, frame.rows/2, frame.cols/2, frame.rows/2);
Rect roi2(frame.cols, 0, frame.cols/2, frame.rows/2);
threshold_3channels.copyTo( output(roi2) );
// Create edge map of the input image
Mat temp_input; cvtColor(frame_reduced, temp_input, CV_BGR2GRAY);
gpu::GpuMat input_edges(temp_input);
gpu::GpuMat output_edges;
gpu::Canny(input_edges, output_edges, 50, 175);
Mat canny_edges(output_edges);
Mat canny_3channels(frame_reduced.rows, frame_reduced.cols, CV_8UC3);
Mat in2[] = { canny_edges, canny_edges, canny_edges };
int from_to2[] = { 0,0, 1,1, 2,2 };
mixChannels( in2, 3, &canny_3channels, 1, from_to2, 3 );
Rect roi3(frame.cols + frame.cols/2, 0, frame.cols/2, frame.rows/2);
canny_3channels.copyTo( output(roi3) );
// Create image variant of color image
Mat mask_color;
bitwise_and(frame_reduced, threshold_3channels, mask_color);
Rect roi4(frame.cols, frame.rows/2, frame.cols/2, frame.rows/2);
mask_color.copyTo( output(roi4) );
// Calculate gradient and visualize them
cvtColor(frame_orig, frame_orig, CV_BGR2GRAY);
frame_orig.convertTo(frame_orig, CV_64F);
vector<Mat> gradient = calculate_gradient(frame_orig);
normalize(gradient[0], gradient[0], 1, 0, CV_MINMAX);
gradient[0] = gradient[0] * 255;
gradient[0].convertTo(gradient[0], CV_8UC1);
Mat gradient_3channels(frame_reduced.rows, frame_reduced.cols, CV_8UC3);
Mat in_gr[] = { gradient[0], gradient[0], gradient[0] };
int from_to_gr[] = { 0,0, 1,1, 2,2 };
mixChannels( in_gr, 3, &gradient_3channels, 1, from_to_gr, 3 );
Rect roi5(frame.cols + frame.cols/2, frame.rows/2, frame.cols/2, frame.rows/2);
gradient_3channels.copyTo( output(roi5) );
stringstream HSV_parameter;
HSV_parameter << "HSV buffer set to " << buffer << " %.";
putText(output, HSV_parameter.str(), Point(10,35), 1, 2, Scalar(0, 0, 0), 2);
// Show what you got
imshow( window, output );
return average_values;
}
/** @function main */
int main( int argc, const char** argv )
{
// Define thresholding for HSV upper and lower limits based on calculated average values
// H 0 - 180 specific values for openCV
// S&V 0 - 255
// For now software can be called using workshop_face_detect.exe <buffer - percentages [0%,100%]>
double buffer = 10;
if( argv[1] ){
buffer = atoi(argv[1]);
}
VideoCapture capture(0);
if(!capture.isOpened()){ // check if we succeeded
cout << "Could not open webcam input." << endl;
return -1;
}
Mat frame_captured;
string windowname = "Webcam capture with face detection and post processing - DSP Valley Seminar";
// Create a cascade classifier object for running on GPU - central public value since it is needed in several functions
// Load the face LBP cascade needed for detection
face_cascade.load( "C:\\OpenCV\\data\\lbpcascades\\lbpcascade_frontalface.xml" );
// Create a storage element for the average elements
vector<double> values;
for(int i = 0; i < 6; i++){
values.push_back(0);
}
while( true )
{
// Retrieve a new frame from camera
capture >> frame_captured;
if( !(frame_captured.empty()) ){
// calculate buffers in function of OpenCV ranges
double buffer_h = 180 * (buffer / 100);
double buffer_sv = 255 * (buffer / 100);
// Apply the classifier to the frame
vector<double> values_retrieved = detect_and_process( frame_captured, windowname, values, buffer, buffer_h, buffer_sv );
// If the values where changed, update them
values = values_retrieved;
}else{
cout << "Capturing bad frame, crash avoided!";
}
// Look if someone pressed ESC character in order to close down application
// ASCI code for ESC character is 27
int key = waitKey(25);
if( key == 43 ) {
if ( buffer < 100 ){
buffer = buffer + 5;
}
}
if( key == 45 ) {
if ( buffer > 0 ){
buffer = buffer - 5;
}
}
if(key == 27 ) { break;}
}
// Destroy the namedWindow just to be sure
cvDestroyWindow( "Webcam capture with face detection" );
return 0;
}