Hello! I am trying to train a binary logistic regression classifier on hog features calculated from opencv's internal implementation of the hog descriptor. I am using OpenCV's java bindings. This is my first time trying to train a machine learning model with OpenCV. The model seems to train correctly, but when I try to test the model on some of the data I collected, I get a matrix that seems to be much larger than it should be and full of empty spaces. I would appreciate any help you could give me with this! I got most of my code from this question. Here is my Code:
String DATABASEPos = "C:\\TrainingArena\\yalefaces\\yaleB11";
String DATABASENeg = "C:\\TrainingArena\\neg";
//List initialization
ArrayList<Integer> training_labels_array = new ArrayList<>();
ArrayList<Integer> testing_labels_array = new ArrayList<>();
Mat TRAINING_DATA = new Mat();
Mat TESTING_DATA = new Mat();
// Load training and testing data
File[] directories = new File(DATABASEPos).listFiles();
HOGDescriptor hog = new HOGDescriptor(new Size(640,480),new Size(64,64),new Size(16,16), new Size(8,8),16);
for(int i = 0; i < directories.length; i++){
File[] files = directories[i].listFiles(); //get all files in each directory
int limit = 100;
System.out.print("Loading training positive data...");
for(int j = 0; j < limit; j++){
Mat image = Imgcodecs.imread(files[j].getAbsolutePath(),Imgcodecs.IMREAD_UNCHANGED); //for each file, read the image
MatOfFloat training_feature = new MatOfFloat();
Size winstride = new Size();
Size padding = new Size();
MatOfPoint locations = new MatOfPoint();
hog.compute(image,training_feature,winstride,padding,locations);
locations.release();
TRAINING_DATA.push_back(training_feature);
training_labels_array.add(1);
System.out.println(100*(((j+1)*1.0)/(limit*1.0))+"%");
image.release();
System.gc();
}
System.out.print("Loading testing positive data...");
int lim2 = limit + 100;
for(int j = limit; j < lim2; j++){
Mat image = Imgcodecs.imread(files[j].getAbsolutePath());
MatOfFloat testing_feature = new MatOfFloat();
Size winstride = new Size();
Size padding = new Size();
MatOfPoint locations = new MatOfPoint();
hog.compute(image,testing_feature,winstride,padding,locations);
locations.release();
TESTING_DATA.push_back(testing_feature);
testing_labels_array.add(1);
System.out.println(100*(((j+1-limit)*1.0)/((lim2-limit)*1.0))+"%");
image.release();
System.gc();
}
}
File[] neg = new File(DATABASENeg).listFiles();
int negLimitTraining = 100;
int negLimitTesting = 200;
System.out.print(negLimitTraining);
System.out.print("Loading training negative data...");
for(int i = 0; i < negLimitTraining; i++) {
Mat image = Imgcodecs.imread(neg[i].getAbsolutePath(),Imgcodecs.IMREAD_UNCHANGED); //for each file, read the image
MatOfFloat training_feature = new MatOfFloat();
Size winstride = new Size();
Size padding = new Size();
MatOfPoint locations = new MatOfPoint();
hog.compute(image,training_feature,winstride,padding,locations);
locations.release();
TRAINING_DATA.push_back(training_feature);
training_labels_array.add(0);
System.out.println(100*(((i+1)*1.0)/(negLimitTraining))+"%");
image.release();
System.gc();
}
System.out.print("Loading testing negative data...");
for(int i = negLimitTraining; i < negLimitTesting+negLimitTraining; i++) {
Mat image = Imgcodecs.imread(neg[i].getAbsolutePath(),Imgcodecs.IMREAD_UNCHANGED); //for each file, read the image
MatOfFloat training_feature = new MatOfFloat();
Size winstride = new Size();
Size padding = new Size();
MatOfPoint locations = new MatOfPoint();
hog.compute(image,training_feature,winstride,padding,locations);
locations.release();
TRAINING_DATA.push_back(training_feature);
training_labels_array.add(0);
System.out.println(100*(((i+1-negLimitTraining)*1.0)/(negLimitTesting-negLimitTraining))+"%"); //Yea I know this percent calculation is wrong, but its fun to see it go to 200%
image.release();
System.gc();
}
// Put training and testing labels into Mats
Mat TRAINING_LABELS = Mat.zeros(TRAINING_DATA.rows(), 1, CvType.CV_32SC1);
for(int i = 0; i < training_labels_array.size(); i++){
TRAINING_LABELS.put(i, 0, training_labels_array.get(i));
}
Mat TESTING_LABELS = Mat.zeros(TESTING_DATA.rows(), 1, CvType.CV_32SC1); for(int i = 0; i < testing_labels_array.size(); i++){
TESTING_LABELS.put(i, 0, testing_labels_array.get(i));
}
TRAINING_LABELS.convertTo(TRAINING_LABELS,CvType.CV_32F);
TESTING_LABELS.convertTo(TESTING_LABELS,CvType.CV_32F);
System.out.println("TRAINING_DATA - Rows:" + TRAINING_DATA.rows() + " Cols:" + TRAINING_DATA.cols());
System.out.println("TRAINING_LABELS - Rows:" + TRAINING_LABELS.rows() + " Cols:" + TRAINING_LABELS.cols());
System.out.println("TESTING_DATA - Rows:" + TESTING_DATA.rows() + " Cols:" + TESTING_DATA.cols());
System.out.println("TESTING_LABELS - Rows:" + TESTING_LABELS.rows() + " Cols:" + TESTING_LABELS.cols());
// Train SVM
LogisticRegression log = LogisticRegression.create();
log.setRegularization(1);
log.setTrainMethod(LogisticRegression.MINI_BATCH);
log.setMiniBatchSize(100);
log.setIterations(10000);
log.setLearningRate(0.05);
log.train(TRAINING_DATA, Ml.ROW_SAMPLE, TRAINING_LABELS);
Mat RESULTS = new Mat();
int flags = 0;
log.predict(TESTING_DATA, RESULTS, flags);
System.out.print(Core.sumElems(RESULTS));