ocl-facedetect sample - CL_BUILD_PROGRAM_FAILURE in function openCLGetKernelFromSource building kernel for calc_sub_hist_D0

asked 2013-10-07 13:35:46 -0500

updated 2013-10-07 13:54:42 -0500

On Ubuntu 12.04 LTS with NVidia GeForce 8 series GPU card, I am trying to run the ocl-facedetect sample of OpenCV and seeing following error:

clBuildProgram status=-11 BUILD LOG (0) Error: unsupported operation OpenCV Error: Gpu API call (CL_BUILD_PROGRAM_FAILURE) in openCLGetKernelFromSource, file /home/ovuser/OpenCV/opencv-, line 664 terminate called after throwing an instance of 'cv::Exception' what(): /home/ovuser/OpenCV/opencv- error: (-217) CL_BUILD_PROGRAM_FAILURE in function openCLGetKernelFromSource

It seems the OpenCL compiler is unable to build one of the kernel required by the face detect sample. I have put some verbose messages and here is the kernel source causing the problem. Can someone point out what's wrong with the kernel source?

source is unreadable so attached jpeg too.

./ocl-example-facedetect -t haarcascade_frontalface_alt.xml -i friends.jpg

---- verbose dump ---

C:\fakepath\ocl-example-facedetect.jpg ~~~~ Loading calc_sub_hist_D0 Building source:./calc_sub_hist_D0_GeForce 8600 GT .clb clCreateProgramWithSource status=0 source=#define PARTIAL_HISTOGRAM256_COUNT (256)

define HISTOGRAM256_BIN_COUNT (256)



define NBANKS (16)

define NBANKS_BIT (4)

__kernel __attribute__((reqd_work_group_size(HISTOGRAM256_BIN_COUNT,1,1)))void calc_sub_hist_D0( __global const uint4* src, int src_step, int src_offset, __global int* globalHist, int dataCount, int cols, int inc_x, int inc_y, int hist_step) { __local int subhist[(HISTOGRAM256_BIN_COUNT << NBANKS_BIT)]; int gid = get_global_id(0); int lid = get_local_id(0); int gx = get_group_id(0); int gsize = get_global_size(0); int lsize = get_local_size(0); const int shift = 8; const int mask = HISTOGRAM256_BIN_COUNT-1; int offset = (lid & (NBANKS-1)); uint4 data, temp1, temp2, temp3, temp4; src += src_offset; for(int i=0, idx=lid; i<(NBANKS >> 2); i++, idx += lsize) { subhist[idx] = 0; subhist[idx+=lsize] = 0; subhist[idx+=lsize] = 0; subhist[idx+=lsize] = 0; } barrier(CLK_LOCAL_MEM_FENCE); int y = gid/cols; int x = gid - mul24(y, cols); for(int idx=gid; idx<datacount; idx+="gsize)" {="" data="src[mad24(y," src_step,="" x)];="" temp1="((data" &amp;="" mask)="" &lt;&lt;="" nbanks_bit)="" +="" offset;="" data="" &gt;&gt;="shift;" temp2="((data" &amp;="" mask)="" &lt;&lt;="" nbanks_bit)="" +="" offset;="" data="" &gt;&gt;="shift;" temp3="((data" &amp;="" mask)="" &lt;&lt;="" nbanks_bit)="" +="" offset;="" data="" &gt;&gt;="shift;" temp4="((data" &amp;="" mask)="" &lt;&lt;="" nbanks_bit)="" +="" offset;="" atomic_inc(subhist="" +="" temp1.x);="" atomic_inc(subhist="" +="" temp1.y);="" atomic_inc(subhist="" +="" temp1.z);="" atomic_inc(subhist="" +="" temp1.w);="" atomic_inc(subhist="" +="" temp2.x);="" atomic_inc(subhist="" +="" temp2.y);="" atomic_inc(subhist="" +="" temp2.z);="" atomic_inc(subhist="" +="" temp2.w);="" atomic_inc(subhist="" +="" temp3.x);="" atomic_inc(subhist="" +="" temp3.y);="" atomic_inc(subhist="" +="" temp3.z);="" atomic_inc(subhist="" +="" temp3.w);="" atomic_inc(subhist="" +="" temp4.x);="" atomic_inc(subhist="" +="" temp4.y);="" atomic_inc(subhist="" +="" temp4.z);="" atomic_inc(subhist="" +="" temp4.w);="" x="" +="inc_x;" int="" off="((x">=cols) ? -1 : 0); x = mad24(off, cols, x); y += inc_y - off; } barrier(CLK_LOCAL_MEM_FENCE); int bin1=0, bin2=0, bin3=0, bin4=0; for(int i=0; i<nbanks; i+="4)" {="" bin1="" +="subhist[(lid" &lt;&lt;="" nbanks_bit)="" +="" i];="" bin2="" +="subhist[(lid" &lt;&lt;="" nbanks_bit)="" +="" i+1];="" bin3="" +="subhist[(lid" &lt;&lt;="" nbanks_bit)="" +="" i+2];="" bin4="" +="subhist[(lid" &lt;&lt;="" nbanks_bit)="" +="" i+3];="" }="" globalhist[mad24(gx,="" hist_step,="" lid)]="bin1+bin2+bin3+bin4;" }="" __kernel="" void="" __attribute__((reqd_work_group_size ... (more)

