Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

cv::ocl::Kernel run method generate exception ( error: (-215) clEnqueueReadBuffer) (OpenCV 4.1.0)

I try next:

class CustomKernelRunner
{
    bool isInit = false;
    cv::ocl::Context context;
    cv::ocl::ProgramSource programSource;
    cv::ocl::Program program;
    cv::ocl::Kernel kernel;
public:
    CustomKernelRunner()
    {


}
bool init()
{
    assert(! isInit);

    using namespace std;
    if (!cv::ocl::haveOpenCL())
    {
        cout << "OpenCL is not avaiable..." << endl;
        return false;
    }
    if (!context.create(cv::ocl::Device::TYPE_GPU))
    {
        cout << "Failed creating the context on GPU..." << endl;
        return false;
    }
    // In OpenCV 3.0.0 beta, only a single device is detected.
    cout << context.ndevices() << " GPU devices are detected." << endl;
    for (int i = 0; i < (int)context.ndevices(); i++)
    {
        cv::ocl::Device device = context.device(i);
        cout << "name                 : " << device.name() << endl;
        cout << "available            : " << device.available() << endl;
        cout << "imageSupport         : " << device.imageSupport() << endl;
        cout << "OpenCL_C_Version     : " << device.OpenCL_C_Version() << endl;
        cout << endl;
    }

    // Select the first device
    cout << "select first device" << context.device(0).name() << endl;
    cv::ocl::Device(context.device(0));

    isInit = true;
    return true;
}

//        kernel.args(image, shift_x, shift_y, cv::ocl::KernelArg::ReadWrite(umat_dst));

void prepare_kernel(const char* kernelName,
                    const char* kernelSource)
{
    assert(isInit);

    programSource = cv::ocl::ProgramSource(kernelSource);

    // Compile the kernel code
    cv::String errmsg;
    cv::String buildopt /*= cv::format("-D dstT=%s", cv::ocl::typeToStr(umat_dst.depth()))*/; 
    program = context.getProg(programSource, buildopt, errmsg);
    kernel = cv::ocl::Kernel(kernelName, program);
    assert(! kernel.empty());
}

bool run_kernel(cv::Mat mat_src, int src_arg_idx,
                cv::Mat mat_dst, int dst_arg_idx,
                QList<cv::ocl::KernelArg> argList)
{
    assert(isInit);

    // Transfer Mat data to the device
    cv::UMat umat_src = mat_src.getUMat(cv::ACCESS_READ);
    cv::UMat umat_dst(mat_src.size(), mat_src.type(), cv::ACCESS_WRITE);

    assert((src_arg_idx < argList.size()) && (src_arg_idx >= 0));
    assert((dst_arg_idx < argList.size()) && (dst_arg_idx >= 0));
    argList[src_arg_idx] = cv::ocl::KernelArg::PtrReadOnly(umat_src);
    argList[dst_arg_idx] = cv::ocl::KernelArg::PtrReadWrite(umat_dst);

    assert((argList.size() <= 4) && (argList.size() > 0));
    assert(! kernel.empty());

    switch (argList.size()) {
    case 1:
    {
        kernel.args(argList.at(0));
        break;
    }
    case 2:
    {
        kernel.args(argList.at(0), argList.at(1));
        break;
    }
    case 3:
    {
        kernel.args(argList.at(0), argList.at(1), argList.at(2));
        break;
    }
    case 4:
    {
        kernel.args(argList.at(0), argList.at(1), argList.at(2), argList.at(3));
        break;
    }
    default:
        return false;
    }

    size_t globalThreads[2] = { (size_t)mat_src.cols,
                                (size_t)mat_src.rows};
    //size_t localThreads[3] = { 16, 16, 1 };
    int dims = 2; // ?
    bool success = kernel.run(dims, globalThreads, NULL, true);
    if (!success){
        cout << "Failed running the kernel..." << endl;
        return false;
    }

    // Download the dst data from the device (?)
    mat_dst = umat_dst.getMat(cv::ACCESS_READ);

    return true;
}

};

using of the class:

cv::Mat src = cv::Mat(100, 128/*divide on 8*/, CV_32F, cv::Scalar(1));
    int src_width = src.cols;

    cv::Mat dst  = cv::Mat(100, 128/8, CV_32F, cv::Scalar(0));

    DEBUG_NM(src.at<float>(0, 0));
    DEBUG_NM(src.at<float>(0, 1));

    if (true)
    {
        CustomKernelRunner ckr;
        ckr.init();
        ckr.prepare_kernel(kernelName.toUtf8().constData(),
                           kernelSrc.toUtf8().constData());

        QList<ocl::KernelArg> args;

        args.append(ocl::KernelArg{}); // for src

        args.append(ocl::KernelArg{}); // for dst

        args.append(ocl::KernelArg(ocl::KernelArg::READ_ONLY, 0, 1, 1, &src_width, sizeof(src_width)));

        args.append(ocl::KernelArg::Local()); // size??
        //    args.append(ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, /*local group size?*/)); // size

        ckr.run_kernel(src, 0, dst, 1, args);
    }

```

But result:

1 GPU devices are detected.
name                 : GeForce GT 1030
available            : 1
imageSupport         : 1
OpenCL_C_Version     : OpenCL C 1.2 

select first deviceGeForce GT 1030
OpenCV Error: Assertion failed (clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS) in map, file D:\Qt\Projects\TubesScanner\soft\OpenCV\opencv-3.2.0\modules\core\src\ocl.cpp, line 4773
terminate called after throwing an instance of 'cv::Exception'
  what():  D:\Qt\Projects\TubesScanner\soft\OpenCV\opencv-3.2.0\modules\core\src\ocl.cpp:4773: error: (-215) clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS in function map