cv::ocl::Kernel run method generate exception ( error: (-215) clEnqueueReadBuffer) (OpenCV 4.1.0)
I try next:
class CustomKernelRunner
{
bool isInit = false;
cv::ocl::Context context;
cv::ocl::ProgramSource programSource;
cv::ocl::Program program;
cv::ocl::Kernel kernel;
public:
CustomKernelRunner()
{
}
bool init()
{
assert(! isInit);
using namespace std;
if (!cv::ocl::haveOpenCL())
{
cout << "OpenCL is not avaiable..." << endl;
return false;
}
if (!context.create(cv::ocl::Device::TYPE_GPU))
{
cout << "Failed creating the context on GPU..." << endl;
return false;
}
// In OpenCV 3.0.0 beta, only a single device is detected.
cout << context.ndevices() << " GPU devices are detected." << endl;
for (int i = 0; i < (int)context.ndevices(); i++)
{
cv::ocl::Device device = context.device(i);
cout << "name : " << device.name() << endl;
cout << "available : " << device.available() << endl;
cout << "imageSupport : " << device.imageSupport() << endl;
cout << "OpenCL_C_Version : " << device.OpenCL_C_Version() << endl;
cout << endl;
}
// Select the first device
cout << "select first device" << context.device(0).name() << endl;
cv::ocl::Device(context.device(0));
isInit = true;
return true;
}
// kernel.args(image, shift_x, shift_y, cv::ocl::KernelArg::ReadWrite(umat_dst));
void prepare_kernel(const char* kernelName,
const char* kernelSource)
{
assert(isInit);
programSource = cv::ocl::ProgramSource(kernelSource);
// Compile the kernel code
cv::String errmsg;
cv::String buildopt /*= cv::format("-D dstT=%s", cv::ocl::typeToStr(umat_dst.depth()))*/;
program = context.getProg(programSource, buildopt, errmsg);
kernel = cv::ocl::Kernel(kernelName, program);
assert(! kernel.empty());
}
bool run_kernel(cv::Mat mat_src, int src_arg_idx,
cv::Mat mat_dst, int dst_arg_idx,
QList<cv::ocl::KernelArg> argList)
{
assert(isInit);
// Transfer Mat data to the device
cv::UMat umat_src = mat_src.getUMat(cv::ACCESS_READ);
cv::UMat umat_dst(mat_src.size(), mat_src.type(), cv::ACCESS_WRITE);
assert((src_arg_idx < argList.size()) && (src_arg_idx >= 0));
assert((dst_arg_idx < argList.size()) && (dst_arg_idx >= 0));
argList[src_arg_idx] = cv::ocl::KernelArg::PtrReadOnly(umat_src);
argList[dst_arg_idx] = cv::ocl::KernelArg::PtrReadWrite(umat_dst);
assert((argList.size() <= 4) && (argList.size() > 0));
assert(! kernel.empty());
switch (argList.size()) {
case 1:
{
kernel.args(argList.at(0));
break;
}
case 2:
{
kernel.args(argList.at(0), argList.at(1));
break;
}
case 3:
{
kernel.args(argList.at(0), argList.at(1), argList.at(2));
break;
}
case 4:
{
kernel.args(argList.at(0), argList.at(1), argList.at(2), argList.at(3));
break;
}
default:
return false;
}
size_t globalThreads[2] = { (size_t)mat_src.cols,
(size_t)mat_src.rows};
//size_t localThreads[3] = { 16, 16, 1 };
int dims = 2; // ?
bool success = kernel.run(dims, globalThreads, NULL, true);
if (!success){
cout << "Failed running the kernel..." << endl;
return false;
}
// Download the dst data from the device (?)
mat_dst = umat_dst.getMat(cv::ACCESS_READ);
return true;
}
};
using of the class:
cv::Mat src = cv::Mat(100, 128/*divide on 8*/, CV_32F, cv::Scalar(1));
int src_width = src.cols;
cv::Mat dst = cv::Mat(100, 128/8, CV_32F, cv::Scalar(0));
DEBUG_NM(src.at<float>(0, 0));
DEBUG_NM(src.at<float>(0, 1));
if (true)
{
CustomKernelRunner ckr;
ckr.init();
ckr.prepare_kernel(kernelName.toUtf8().constData(),
kernelSrc.toUtf8().constData());
QList<ocl::KernelArg> args;
args.append(ocl::KernelArg{}); // for src
args.append(ocl::KernelArg{}); // for dst
args.append(ocl::KernelArg(ocl::KernelArg::READ_ONLY ...
add a comment