The imgproc module has a comparison of histogram function that have different methods. I have used the first HISTCMP_CORREL that calculates the correlation of 2 histograms as it can be seen in the documentation (link).
I have tried to replicate that formula with NumPy to calculate the correlation but the result is far for being equal. In fact, I tried with the formula, with NumPy built-in function and also looking to the source code of OpenCV and trying to replicate it (link from line 1923).
import cv2
import numpy as np
def OpenCV_compareHist(H1, H2):
compareHist = cv2.compareHist(H1, H2, cv2.HISTCMP_CORREL)
print 'compareHist - OpenCV: ', compareHist
def My_compareHist(H1, H2):
H1 = H1.flatten()
H2 = H2.flatten()
compareHist = np.sum( (H1 - np.mean(H1)) * (H2 - np.mean(H2)) ) / np.sqrt( np.sum((H1 - np.mean(H1))**2) * np.sum((H2 - np.mean(H2))**2) )
print 'compareHist - Formula: ', compareHist
print 'compareHist - NumPy: ', np.corrcoef(H1, H2)[0, 1]
s12 = np.sum(H1*H2)
s1 = np.sum(H1)
s11 = np.sum(H1*H1)
s2 = np.sum(H2)
s22 = np.sum(H2*H2)
total = 256.*256.*256.
scale = 1./total
num = s12 - s1*s2*scale
denom2 = (s11 - s1*s1*scale)*(s22 - s2*s2*scale)
print 'compareHist - CPP: ', num / np.sqrt(denom2)
if __name__ == "__main__":
img1 = cv2.imread('img_1.jpg', -1)
img2 = cv2.imread('img_2.jpg', -1)
H1 = cv2.calcHist([img1],channels=[0,1,2],mask=None,histSize=[256,256,256],ranges=[0,256,0,256,0,256])
H2 = cv2.calcHist([img2],channels=[0,1,2],mask=None,histSize=[256,256,256],ranges=[0,256,0,256,0,256])
OpenCV_compareHist(H1, H2)
My_compareHist(H1, H2)