# is my concept of infomax wrong or right?

hi. sorry guys i would like to know if my concept of implementation ICA infomax is wrong or what because the result is blank image. here is my code:

/***

This is ICA infomax implementation
equation :  X = AS  where X is the mixture matrix, A is mixing matrix, S is the source

SO ------> S = WX   where W is unmixing matrix.
and since there no way that we can recover the excat source image but
so we use U instead of S where U ~= S
which gives us  U = WX

W = learning rate (YU) W + W

where Y = -tanh(U/2)
YU = I + Y (U)t

***/
#include <iostream>
#include <math.h>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;

const char* depthToStr(int depth) {
switch(depth){
case CV_8U: return "unsigned char";
case CV_8S: return "char";
case CV_16U: return "unsigned short";
case CV_16S: return "short";
case CV_32S: return "int";
case CV_32F: return "float";
case CV_64F: return "double";
}
return "invalid type!";
}

void ICA(Mat &w, Mat x, Mat &s)
{

// get the size of the unmixing matrix
int Weightrows = x.rows;
int Weightcols = x.rows;

// learning rate
float lnrate = 0.95;

// U = source (s)
Mat u;

// y ---> is super guassian
Mat y;

// YU
Mat yu;
// creating I (indentity matrix)
Mat I = Mat::eye (x.rows, x.rows, CV_64F);

// number of iterates
int iteratesNumber = 0;

// creating the unmixing matrix and set it to random variables
w.create(Weightrows, Weightrows, CV_64F);
randu(w, Scalar(-1), Scalar(1));

x.convertTo(x, CV_64F);
u.convertTo(u, CV_64F);
cout << depthToStr(x.depth()) << endl << depthToStr(u.depth()) << endl<<depthToStr(w.depth()) <<endl;
for(int iter = 0; iter < 10; iter++)
{
u = w * x;

// set the super guassian (nonlinear)

Mat tanh1, tanh2, tanh;
exp (u, tanh1);
exp (-1 * u , tanh2);

tanh  = (tanh1 - tanh2) / (tanh1 + tanh2);
y = -1 * tanh;

yu = I + y * u.t();

w = 0.95  * yu * w + w;

}

s = w * x;

}
int main()
{
Mat Image, weight, result;

imshow(" s", Image);

ICA(weight, Image, result);
cout << result <<endl;
imshow(" ", result);
waitKey(0);
return 0;
}

the output wasn't as i expected

imho, you're already quite close !

the main thing missing here is: if you want to seperate N sources, you need N mixture images, flattened and stacked (similar to opencv's PCA usage) , not a single image. also the size of W is NxN, unrelated to the size of the images.

from there on, it's more a matter of careful normalization, whitening, and finding good initial random values (tanh easily "explodes", and produces NaN values, if the "energy" is too high)

#include <iostream>
#include <opencv2/opencv.hpp>

using namespace cv;
using namespace std;

void ICA(const Mat &mix, Mat &w, Mat &s)
{
Mat x;
mix.convertTo(x, CV_64F, 1.0/255);
// whiten input
Scalar m,d;
meanStdDev(x,m,d);
x -= m[0];
x /= d[0];

// get the size of the unmixing matrix
int nmix = x.rows;

// learning rate
float lnrate = 0.95;

Mat I = Mat::eye(nmix, nmix, CV_64F);
w.create(nmix, nmix, CV_64F);
randu(w, Scalar(-.01), Scalar(.01));

for(int iter=0; iter<5000; iter++)
{
Mat u = w * x;

Mat tanh1, tanh2, tan;
exp (u, tanh1);
exp (-1 * u , tanh2);
// super gaussian
Mat y = (tanh1 - tanh2) / (tanh1 + tanh2);
// sub gaussian
//Mat y = u - (tanh1 - tanh2) / (tanh1 + tanh2);

Mat yu = (I - y * u.t());

w = w + 0.95 * yu * w;

if (iter % 100 == 0)
cout << iter << " " << sum(yu)[0] << endl;
}

s = w * x;
}

int main()
{

Size siz(100,100);
resize(i1,i1,siz);
resize(i2,i2,siz);
resize(i3,i3,siz);

// to seperate 3 inputs, we need 3 mixes
Mat m1 = i1*.4 + i2*.3+i3*.3;
Mat m2 = i1*.3 + i2*.4+i3*.3;
Mat m3 = i1*.3 + i2*.3+i3*.4;

// visualization
Mat m;
hconcat(m1,m2,m);
hconcat(m,m3,m);
imshow("mix", m);
Mat o;
hconcat(i1,i2,o);
hconcat(o,i3,o);
imshow("orig", o);
waitKey(10);

// stack them to a single input Mat
Mat mix;
mix.push_back(m1.reshape(1,1));
mix.push_back(m2.reshape(1,1));
mix.push_back(m3.reshape(1,1));

// perform infomax
Mat w,result;
ICA(mix, w, result);

cout << "weights " << w << endl;

// visualize output
normalize(result,result,255,0,NORM_MINMAX,CV_8U);
Mat r1 = result.row(0).reshape(1,100);
Mat r2 = result.row(1).reshape(1,100);
Mat r3 = result.row(2).reshape(1,100);
Mat r;
hconcat(r1, r2, r);
hconcat(r, r3, r);
imshow("res", r);

waitKey(0);

return 0;
}

thanks, but why do i get nan values if i set the size(200,200)? doesn't it work of high img size?

1

you need smaller initial values in the W matrix, then.

also, x^3 can be used as nonlinearity, alternative to the tanh, somewhat faster and more robust against that nan problem.

i made some attempt with fastICA, too, will update tomorrow !

"doesn't it work of high img size" -- think of it, the learned w matrix is just 3x3 in this example. (vs 200x200 images) it's amazing to me, that it can learn anything at all !

