Basically there seems to be no way to use custom kernels with OCL except the openCLExecuteKernelInterop()
function which has no documentation and is tagged as "Deprecated"!
I also tried the code that abarral suggested in his question but i couldn't make it run due an unknown error in openCLExecuteKernelInterop(according to gdb).
So is there a way to avoid writing 10K lines of host code for a simple kernel?
In the case that answer is no, is it possible to use oclMat with OpenCL and avoid manually handling memory buffers and just use .download()
& .upload()
methods?
Edit: This is the exact code that abarral has used in his question:
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ocl/ocl.hpp>
int main(int _argc, const char** _argv)
{
cv::ocl::DevicesInfo devInfo;
int res = cv::ocl::getOpenCLDevices(devInfo);
if(res == 0)
{
std::cout << "There is no OPENCL Here !" << std::endl;
}else
{
for(int i = 0 ; i < devInfo.size() ; ++i)
{
std::cout << "Device : " << devInfo[i]->deviceName
<< " is present" << std::endl;
}
}
cv::ocl::setDevice(devInfo[0]); // select device to use
std::cout << CV_VERSION_EPOCH << "."
<< CV_VERSION_MAJOR << "." << CV_VERSION_MINOR << std::endl;
const char *KernelSource = "\n" \
"__kernel void square( \n" \
" __global uchar* input, \n" \
" __global uchar* output) \n" \
"{ \n" \
" int i = get_global_id(0); \n" \
" output[i] = input[i] * input[i]; \n" \
"}\n";
cv::ocl::ProgramSource src("square", KernelSource);
std::size_t globalThreads[3]={1,0,0};
std::size_t localThreads[3]={5,5,0};
cv::ocl::oclMat source(cv::Size(500,500), CV_8UC1);
cv::ocl::oclMat dest(cv::Size(500,500), CV_8UC1);
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( 4, (void *) &source.data ));
args.push_back( std::make_pair( 4, (void *) &dest.data ));
cv::ocl::openCLExecuteKernelInterop(cv::ocl::Context::getContext(), src, "square",
globalThreads, localThreads, args, -1, -1, "");
return 0;
}
It gets terminated by "receiving signal 8 (Floating point exception)".
GDB brings:
Program received signal SIGFPE,
Arithmetic exception.
0x00007ffff469cc6b in
cv::ocl::openCLExecuteKernelInterop(cv::ocl::Context,
cv::ocl::ProgramSource const&,
std::string, unsigned long, unsigned
long, std::vector<std::pair<unsigned long,="" void="" const*="">,
std::allocator<std::pair<unsigned long,="" void="" const*=""> > >&, int, int,
char const) () from
/usr/lib/libopencv_ocl.so.2.4