Ask Your Question

Revision history [back]

I think the other answer is way to complicated for this problem. Basically you will need to do the following steps

  1. Convert your region of interest (detection) to HSV color space by using the cvtColor function with the CV_BGR@HSV parameter.
  2. Now define the max and min value of H S and V channel.
  3. Use these values to get a good segmentation of the original values

This code snippet should do about what you need. It contains much more functionality, but it shouldn't be hard to filter out the needed parts, which I have no time for now.

// workshop_face_detect.cpp : Performing LBP CUDA face detection on live video stream
// Make it possible to segment out skin color

#include <opencv/cv.h>
#include <opencv/cvaux.h>

#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/gpu/gpu.hpp"

#include <iostream>
#include <stdio.h>

using namespace std;
using namespace cv;

gpu::CascadeClassifier_GPU face_cascade;

// Basic function to calculate gradient magnitude and angle matrix based on input
vector<Mat> calculate_gradient(Mat input){
    Mat img_smooth;
    GaussianBlur( input, img_smooth, Size(11,11), 5);

    Mat grad_x = Mat(img_smooth.rows, img_smooth.cols, CV_64F);
    Mat grad_y = Mat(img_smooth.rows, img_smooth.cols, CV_64F);

    Sobel( img_smooth, grad_x, CV_64F, 1, 0, 3, 1, 0, BORDER_DEFAULT );
    Sobel( img_smooth, grad_y, CV_64F, 0, 1, 3, 1, 0, BORDER_DEFAULT );

    Mat magnitude = Mat(img_smooth.rows, img_smooth.cols, CV_64F);
    sqrt(grad_x.mul(grad_x) + grad_y.mul(grad_y), magnitude);

    Mat orientations = Mat(img_smooth.rows, img_smooth.cols, CV_64F);

    for(int i = 0; i < img_smooth.rows; i++){
        for(int j = 0; j < img_smooth.cols; j++){
            orientations.at<double>(i,j) = fastAtan2(grad_x.at<double>(i,j), grad_y.at<double>(i,j));
        }
    }

    vector<Mat> output;
    output.push_back(magnitude);
    output.push_back(orientations);

    return output;
}

// Based on radial coördinates (angle and magnitude) calculate corresponding carthesian coördinates (x,y)
// Specific for OpenCV coördinate system
vector<Point> radial_to_carthesian(Point start, double angle, double magnitude){
    const double PI = 3.141592;

    // Since sin and cos functions already return values between [-1,1] we do not need to calculate signs for quadrants
    // However, this corner is still given a standard 
    double angle_rad = angle * PI / 180;
    double x_temp = cos(angle_rad) * magnitude;
    double y_temp = sin(angle_rad) * magnitude;
    double x_2 = start.x + x_temp;
    double y_2 = start.y + y_temp;

    // Create points
    vector<Point> result;
    result.push_back(start);
    result.push_back(Point(x_2, y_2));

    return result;
}

Mat visualize_gradients(vector<Mat> gradients, Mat input, int step, int magnitude){
    Mat result = Mat(input.rows, input.cols, input.type());
    input.copyTo(result);
    for(int i = 3; i < input.rows; i = i + step){
        for(int j = 3; j < input.cols; j = j + step){
            // the points (i,j) now loop through the image with points to draw
            // check in which quadrant the angle lies and then compute the correct x and y length
            // Since data is now provided as [0-1] ranges, we need to multiply with 360 to get the actual angle
            double angle = gradients[1].at<double>(i,j) * 360;
            vector<Point> line_positions = radial_to_carthesian(Point(j,i), angle, magnitude);
            line(result, line_positions[0], line_positions[1], Scalar(255,0,0), 1);
        }
    }
    return result;
}


/** @function detectAndDisplay */
vector<double> detect_and_process( Mat frame, string window, vector<double> average_values, double buffer, double buffer_h, double buffer_sv )
{
    // Frame has the original input frame
    // Frame_reduced has a reduced ratio for processing, so that results can be displayed on smaller versions (1/4th original)
    // Frame_orig has the reduced original input and won't be processed
    Mat frame_reduced, frame_orig;
    resize(frame, frame_reduced, Size(frame.cols/2, frame.rows/2));
    frame_orig = frame_reduced.clone();

    // Read in original frame, create grayscale and perform histogram equilization
    // Needed for good detection results
    Mat grayscale;
    cvtColor( frame, grayscale, CV_BGR2GRAY );
    equalizeHist( grayscale, grayscale );

    // Create face detection frame and returned faces.
    // Perform detection on this given set of data, given known model faces
    gpu::GpuMat frame_gpu(grayscale), faces;
    int detections = face_cascade.detectMultiScale( frame_gpu, faces, 1.05, 15);

    // CUDA way of going through the detections that are retrieved
    // Put them into a container
    Mat obj_host;
    faces.colRange(0, detections).download(obj_host);
    Rect* facesRect = obj_host.ptr<Rect>();

    // Container to capture all face regions within the image not having to use pointers to cuda memory again
    vector<Rect> roi_faces;

    for( int i = 0; i < detections; i++ )
    {
        // Select face region
        Rect region( facesRect[i].x, facesRect[i].y, facesRect[i].width, facesRect[i].height );

        // Add a RED rectangle around the blur region
        rectangle(frame, facesRect[i], Scalar(0,0,255), 2);
        stringstream face;
        face << "Face " << (i + 1);
        putText(frame, face.str(), Point(facesRect[i].x, facesRect[i].y - 15), 1, 2, Scalar(0, 0, 255), 1 );

        // Add to the vector faces storage for further use
        roi_faces.push_back(region);
    }

    // Add another processing step
    // Combine all processing into one frame
    Mat output = Mat::zeros(frame.rows, frame.cols*2, frame.type());
    Rect roi(0, 0, frame.cols, frame.rows);
    frame.copyTo( output(roi) );

    // Create HSV color space and segment out only the H space
    // Display this in two subwindows
    Mat frame_hsv;
    vector<Mat> hsv_channels;
    cvtColor(frame_reduced, frame_hsv, CV_BGR2HSV);
    split(frame_hsv, hsv_channels);

    //Rect roi2(frame.cols, 0, frame.cols/2, frame.rows/2);
    //frame_hsv.copyTo( output(roi2) );

    // Create skin threshold segmentation into binary mask
    // Only when there is actually a face detected
    double average_h_top = average_values[0];
    double average_h_bottom = average_values[1];

    double average_s_top = average_values[2];
    double average_s_bottom = average_values[3];

    double average_v_top = average_values[4];
    double average_v_bottom = average_values[5];        

    if( !(roi_faces.size() == 0) ){
        // Define roi for the smaller window size
        Rect region_small(roi_faces[0].x / 2, roi_faces[0].y / 2, roi_faces[0].width /2, roi_faces[0].height / 2);

        // Grab HSV values only if the face is found to calculate the average values
        Mat face_region_h = hsv_channels[0]( region_small );
        Mat face_region_s = hsv_channels[1]( region_small );
        Mat face_region_v = hsv_channels[2]( region_small );

        // Inside that face we want to define a roi that selects the usefull information
        // Leave out 30% of top, containing largest portion of hair.
        // Leave out 10% of space at each side, containing background information due to head shape
        int cols = face_region_h.cols; double cols_d = (double)cols;
        int rows = face_region_h.rows; double rows_d = (double)rows;
        Mat hsv_region_face = face_region_h( Rect((int)(cols_d * 0.2), (int)(rows_d * 0.3), (int)(cols_d * 0.6), (int)(rows_d * 0.7)) );
        Mat hsv_region_face_s = face_region_s( Rect((int)(cols_d * 0.2), (int)(rows_d * 0.3), (int)(cols_d * 0.6), (int)(rows_d * 0.7)) );
        Mat hsv_region_face_v = face_region_v( Rect((int)(cols_d * 0.2), (int)(rows_d * 0.3), (int)(cols_d * 0.6), (int)(rows_d * 0.7)) );

        // Calculate average value of the mat
        Scalar average = mean(hsv_region_face);
        Scalar average_1 = mean(hsv_region_face_s);
        Scalar average_2 = mean(hsv_region_face_v);

        double average_h = average[0];
        double average_s = average_1[0];
        double average_v = average_2[0];

        average_h_top = average_h + buffer_h;
        average_h_bottom = average_h - buffer_h;

        average_s_top = average_s + buffer_sv;
        average_s_bottom = average_s - buffer_sv;

        average_v_top = average_v + buffer_sv;
        average_v_bottom = average_v - buffer_sv;

        // Change elements of average values
        average_values[0] = average_h_top;
        average_values[1] = average_h_bottom;

        average_values[2] = average_s_top;
        average_values[3] = average_s_bottom;

        average_values[4] = average_v_top;
        average_values[5] = average_v_bottom;
    }

    // Segment the h-frame based on that value, creating a binary mask - FOR THE H VALUE
    Mat h_threshold_h, h_threshold_low, h_threshold_high;
    double out1 = threshold(hsv_channels[0], h_threshold_low, average_h_bottom, 1, THRESH_BINARY);
    double out2 = threshold(hsv_channels[0], h_threshold_high, average_h_top, 1, THRESH_BINARY_INV);

    h_threshold_h = h_threshold_low.mul(h_threshold_high);

    // Segment the h-frame based on that value, creating a binary mask - FOR THE S VALUE
    Mat h_threshold_s;
    double out1_b = threshold(hsv_channels[1], h_threshold_low, average_s_bottom, 1, THRESH_BINARY);
    double out2_b = threshold(hsv_channels[1], h_threshold_high, average_s_top, 1, THRESH_BINARY_INV);

    h_threshold_s = h_threshold_low.mul(h_threshold_high);

    // Segment the h-frame based on that value, creating a binary mask - FOR THE V VALUE
    Mat h_threshold_v;
    double out1_c = threshold(hsv_channels[2], h_threshold_low, average_v_bottom, 1, THRESH_BINARY);
    double out2_c = threshold(hsv_channels[2], h_threshold_high, average_v_top, 1, THRESH_BINARY_INV);

    h_threshold_v = h_threshold_low.mul(h_threshold_high);

    // Combine for the three channels
    Mat temp = h_threshold_h.mul(h_threshold_s);
    Mat h_threshold = temp.mul(h_threshold_v);

    // Erode and dilation to remove small elements from the feed
    Mat structuring_element_erosion = getStructuringElement(MORPH_RECT, Size(3,3), Point(0,0));
    Mat structuring_element_dilation = getStructuringElement(MORPH_RECT, Size(3,3), Point(0,0));

    erode(h_threshold, h_threshold, structuring_element_erosion);
    dilate(h_threshold, h_threshold, structuring_element_dilation);

    // We could add two other masks for S and V value
    // Apply the same average filtering

    Mat threshold_3channels(h_threshold.rows, h_threshold.cols, CV_8UC3);
    Mat in_b[] = { h_threshold * 255, h_threshold * 255, h_threshold * 255 };
    int from_to_b[] = { 0,0, 1,1, 2,2 };
    mixChannels( in_b, 3, &threshold_3channels, 1, from_to_b, 3 );

    //Rect roi4(frame.cols, frame.rows/2, frame.cols/2, frame.rows/2);
    Rect roi2(frame.cols, 0, frame.cols/2, frame.rows/2);
    threshold_3channels.copyTo( output(roi2) );

    // Create edge map of the input image
    Mat temp_input; cvtColor(frame_reduced, temp_input, CV_BGR2GRAY);
    gpu::GpuMat input_edges(temp_input);
    gpu::GpuMat output_edges;

    gpu::Canny(input_edges, output_edges, 50, 175); 

    Mat canny_edges(output_edges);

    Mat canny_3channels(frame_reduced.rows, frame_reduced.cols, CV_8UC3);
    Mat in2[] = { canny_edges, canny_edges, canny_edges };
    int from_to2[] = { 0,0, 1,1, 2,2 };
    mixChannels( in2, 3, &canny_3channels, 1, from_to2, 3 );

    Rect roi3(frame.cols + frame.cols/2, 0, frame.cols/2, frame.rows/2);
    canny_3channels.copyTo( output(roi3) );

    // Create image variant of color image
    Mat mask_color;
    bitwise_and(frame_reduced, threshold_3channels, mask_color);

    Rect roi4(frame.cols, frame.rows/2, frame.cols/2, frame.rows/2);
    mask_color.copyTo( output(roi4) );

    // Calculate gradient and visualize them
    cvtColor(frame_orig, frame_orig, CV_BGR2GRAY);
    frame_orig.convertTo(frame_orig, CV_64F);
    vector<Mat> gradient = calculate_gradient(frame_orig);
    normalize(gradient[0], gradient[0], 1, 0, CV_MINMAX);
    gradient[0] = gradient[0] * 255;
    gradient[0].convertTo(gradient[0], CV_8UC1);

    Mat gradient_3channels(frame_reduced.rows, frame_reduced.cols, CV_8UC3);
    Mat in_gr[] = { gradient[0], gradient[0], gradient[0] };
    int from_to_gr[] = { 0,0, 1,1, 2,2 };
    mixChannels( in_gr, 3, &gradient_3channels, 1, from_to_gr, 3 );

    Rect roi5(frame.cols + frame.cols/2, frame.rows/2, frame.cols/2, frame.rows/2);
    gradient_3channels.copyTo( output(roi5) );

    stringstream HSV_parameter;
    HSV_parameter << "HSV buffer set to " << buffer << " %.";
    putText(output, HSV_parameter.str(), Point(10,35), 1, 2, Scalar(0, 0, 0), 2);

    // Show what you got
    imshow( window, output );

    return average_values;
}

/** @function main */
int main( int argc, const char** argv )
{
    // Define thresholding for HSV upper and lower limits based on calculated average values
    // H    0 - 180 specific values for openCV
    // S&V  0 - 255 
    // For now software can be called using workshop_face_detect.exe <buffer - percentages [0%,100%]>
    double buffer = 10;
    if( argv[1] ){
        buffer = atoi(argv[1]);
    }

    VideoCapture capture(0);

    if(!capture.isOpened()){  // check if we succeeded
        cout << "Could not open webcam input." << endl;
        return -1;
    }

    Mat frame_captured;
    string windowname = "Webcam capture with face detection and post processing - DSP Valley Seminar";

    // Create a cascade classifier object for running on GPU - central public value since it is needed in several functions
    // Load the face LBP cascade needed for detection
    face_cascade.load( "C:\\OpenCV\\data\\lbpcascades\\lbpcascade_frontalface.xml" );

    // Create a storage element for the average elements
    vector<double> values;
    for(int i = 0; i < 6; i++){
        values.push_back(0);
    }
    while( true )
    {
        // Retrieve a new frame from camera
        capture >> frame_captured;
        if( !(frame_captured.empty()) ){
            // calculate buffers in function of OpenCV ranges
            double buffer_h = 180 * (buffer / 100);
            double buffer_sv = 255 * (buffer / 100);

            // Apply the classifier to the frame
            vector<double> values_retrieved = detect_and_process( frame_captured, windowname, values, buffer, buffer_h, buffer_sv ); 
            // If the values where changed, update them
            values = values_retrieved;
        }else{
            cout << "Capturing bad frame, crash avoided!";
        }

        // Look if someone pressed ESC character in order to close down application
        // ASCI code for ESC character is 27
        int key = waitKey(25);

        if( key == 43 ) { 
            if ( buffer < 100 ){
                buffer = buffer + 5;
            }
        }
        if( key == 45 ) { 
            if ( buffer > 0 ){
                buffer = buffer - 5;
            }
        }

        if(key == 27 ) { break;}
    }

    // Destroy the namedWindow just to be sure
    cvDestroyWindow( "Webcam capture with face detection" );

    return 0;
}