1 | initial version |
you can try using pointer, opencv function or parallel_loop_body. Best method (relative to speed) will depend of your configuration (image type 16SC or 16UC and image size and your computer) :
int main(int argc, char* argv[]) { int type=CV_16SC1; Mat x(1000,1023,type); for (int i=0;i<x.rows;i++) for="" (int="" j="0;j<x.cols;j++)" x.at<short="">(i,j)=rand()%32767;
vector<Mat> m(3);
m[2] = Mat::zeros (x.rows,x.cols,CV_8UC1);
Mat lsb=Mat::zeros (x.rows,x.cols,type),msb=Mat::zeros (x.rows,x.cols,type),result=Mat::zeros(x.rows,x.cols,CV_8UC3);
// That's only for fun : compile ocl function speed time is equal to compiletime
int64 tpsIni = getTickCount();
bitwise_and(x,0xFF,lsb);
lsb.convertTo(m[1],CV_8UC1,1);
x.convertTo(m[0],CV_8UC1,1./256);
merge(m,result);
int64 tpsFin = getTickCount();
// Time using pointer
result=Mat::zeros(x.rows,x.cols,CV_8UC3);
tpsIni = getTickCount();
if (type==CV_16UC1)
for (int i = 0; i < x.rows; i++)
{
ushort *pts = (ushort*)x.ptr(i);
uchar *ptc = (uchar*)result.ptr(i);
for (int j=0;j<x.cols;j++,pts++,ptc++)
{
*ptc++= *pts/256;
*ptc++ = (*pts &0xFF);
}
}
else
for (int i = 0; i < x.rows; i++)
{
short *pts = (short*)x.ptr(i);
uchar *ptc = (uchar*)result.ptr(i);
for (int j=0;j<x.cols;j++,pts++,ptc++)
{
*ptc++= *pts/256;
*ptc++ = (*pts &0xFF);
}
}
tpsFin = getTickCount();
cout << result.at<Vec3b>(64,32)<<endl;
cout << "Time pointer ="<<tpsFin-tpsIni<<"\n";
imshow("color(pointer)",result);
// Time using opencv fucntion with opencl
tpsIni = getTickCount();
{
bitwise_and(x,0xFF,lsb);
lsb.convertTo(m[1],CV_8UC1);
x.convertTo(m[0],CV_8UC1,1./256);
merge(m,result);
}
tpsFin = getTickCount();
imshow("color(ocv)",result);
cout << result.at<Vec3b>(64,32)<<endl;
cout << "Time opencv function with ocl ="<<tpsFin-tpsIni<<"\n";
Parallel16BisTo2X8Bits p(x,result);
cout << getNumThreads()<<endl;
tpsIni = getTickCount();
parallel_for_(Range(0,x.rows), p,4);
tpsFin = getTickCount();
imshow("color(parallel)",result);
cout << result.at<Vec3b>(64,32)<<endl;
cout << "Time opencv function with parallel ="<<tpsFin-tpsIni<<"\n";
waitKey();
2 | No.2 Revision |
you can try using pointer, opencv function or parallel_loop_body. Best method (relative to speed) will depend of your configuration (image type 16SC or 16UC and image size and your computer) :
class Parallel16BisTo2X8Bits: public ParallelLoopBody
{
private:
Mat &x;
Mat &result;
bool verbose;
public:
Parallel16BisTo2X8Bits(Mat& src, Mat &dst):
x(src),
result(dst),
verbose(false)
{}
void Verbose(bool b){verbose=b;}
virtual void operator()(const Range& range) const
{
if (verbose)
cout << getThreadNum()<<"# :Start from row " << range.start << " to " << range.end-1<<" ("<<range.end-range.start<<" loops)" << endl;
if (x.type()==CV_16SC1)
for (int i = range.start; i < range.end; i++)
{
short *pts = (short*)x.ptr(i);
uchar *ptc = result.ptr(i);
for (int j=0;j<x.cols;j++,pts++)
{
*ptc++= *pts/256;
*ptc++ = (*pts &0xFF);
*ptc++=0;
}
}
else
for (int i = range.start; i < range.end; i++)
{
ushort *pts = (ushort*)x.ptr(i);
uchar *ptc = result.ptr(i);
for (int j=0;j<x.cols;j++,pts++)
{
*ptc++= *pts/256;
*ptc++ = (*pts &0xFF);
*ptc++=0;
}
}
}
Parallel16BisTo2X8Bits& operator=(const Parallel16BisTo2X8Bits &) {
return *this;
};
};
int main(int argc, char* argv[])
{