Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

CUDA Canny Edge Detector is slower than cv::Canny

Hello there.

This is my first post here. I started to learn to use OpenCV and its CUDA capabilities. I've written a simple code that reads input image, resizes it and detect edges with both cv::Canny and CUDA Canny Edge Detector object and log results to a .txt file. My image is 960x585 and 66.3 KB. I used C++ standard chrono library to measure the time spent at Edge Detection process and results show that the time spent at GPU is far more greater than the time spent at CPU. My code and results are given below. Are my results normal or am I doing something very wrong?

#include <stdio.h>
#include <opencv2\core\core.hpp>
#include <opencv2\core\cuda.hpp>
#include <opencv2\imgproc.hpp>
#include <opencv2\opencv.hpp>
#include <chrono>
#include <fstream>


#define SIZE 25

int main()
{
    cv::Mat ImageHost = cv::imread("C:\\Users\\Heisenberg\\Desktop\\revan.jpg", CV_LOAD_IMAGE_GRAYSCALE);



        cv::Mat ImageHostArr[SIZE];

        cv::cuda::GpuMat ImageDev;
        cv::cuda::GpuMat ImageDevArr[SIZE];

        ImageDev.upload(ImageHost);


        for (int n = 1; n < SIZE; n++)
            cv::resize(ImageHost, ImageHostArr[n], cv::Size(), 0.5*n, 0.5*n, CV_INTER_LINEAR);


        for (int n = 1; n < SIZE; n++)
            cv::cuda::resize(ImageDev, ImageDevArr[n], cv::Size(), 0.5*n, 0.5*n, CV_INTER_LINEAR); 

        cv::Mat Detected_EdgesHost[SIZE];
        cv::cuda::GpuMat Detected_EdgesDev[SIZE];

        std::ofstream File1, File2;

        File1.open("C:\\Users\\Heisenberg\\Desktop\\canny_cpu.txt");
        File2.open("C:\\Users\\Heisenberg\\Desktop\\canny_gpu.txt");


        std::cout << "Process started... \n" << std::endl;
        for (int n = 1; n < SIZE; n++) {
            auto start = std::chrono::high_resolution_clock::now();
            cv::Canny(ImageHostArr[n], Detected_EdgesHost[n], 2.0, 100.0, 3, false);
            auto finish = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double> elapsed_time = finish - start;
            File1 << "Image Size: " << ImageHostArr[n].rows* ImageHostArr[n].cols << "  " << "Elapsed Time: " << elapsed_time.count() * 1000 << " msecs" << "\n" << std::endl;
        }


        cv::Ptr<cv::cuda::CannyEdgeDetector> canny_edg = cv::cuda::createCannyEdgeDetector(2.0, 100.0, 3, false);   



        for (int n = 1; n < SIZE; n++) {
            auto start = std::chrono::high_resolution_clock::now();
            canny_edg->detect(ImageDevArr[n], Detected_EdgesDev[n]);
            auto finish = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double> elapsed_time = finish - start;
            File2 << "Image Size: " << ImageDevArr[n].rows* ImageDevArr[n].cols << "  " << "Elapsed Time: " << elapsed_time.count() * 1000 << " msecs" << "\n" << std::endl;
        }
        std::cout << "Process ended... \n" << std::endl;



    return 0;
}

CUDA Canny Edge Detector is slower than cv::Canny

Hello there.

This is my first post here. I started to learn to use OpenCV and its CUDA capabilities. I've written a simple code that reads input image, resizes it and detect edges with both cv::Canny and CUDA Canny Edge Detector object and log results to a .txt file. My image is 960x585 and 66.3 KB. I used C++ standard chrono library to measure the time spent at Edge Detection process and results show that the time spent at GPU is far more greater than the time spent at CPU. My code and results are given below. Are my results normal or am I doing something very wrong?

Laptop specs;

8 GB RAM

Intel i74700 MQ CPU 2.40 GHz

NVIDIA Geforce GT 745M GPU

#include <stdio.h>
#include <opencv2\core\core.hpp>
#include <opencv2\core\cuda.hpp>
#include <opencv2\imgproc.hpp>
#include <opencv2\opencv.hpp>
#include <chrono>
#include <fstream>


#define SIZE 25

int main()
{
    cv::Mat ImageHost = cv::imread("C:\\Users\\Heisenberg\\Desktop\\revan.jpg", CV_LOAD_IMAGE_GRAYSCALE);



        cv::Mat ImageHostArr[SIZE];

        cv::cuda::GpuMat ImageDev;
        cv::cuda::GpuMat ImageDevArr[SIZE];

        ImageDev.upload(ImageHost);


        for (int n = 1; n < SIZE; n++)
            cv::resize(ImageHost, ImageHostArr[n], cv::Size(), 0.5*n, 0.5*n, CV_INTER_LINEAR);


        for (int n = 1; n < SIZE; n++)
            cv::cuda::resize(ImageDev, ImageDevArr[n], cv::Size(), 0.5*n, 0.5*n, CV_INTER_LINEAR); 

        cv::Mat Detected_EdgesHost[SIZE];
        cv::cuda::GpuMat Detected_EdgesDev[SIZE];

        std::ofstream File1, File2;

        File1.open("C:\\Users\\Heisenberg\\Desktop\\canny_cpu.txt");
        File2.open("C:\\Users\\Heisenberg\\Desktop\\canny_gpu.txt");


        std::cout << "Process started... \n" << std::endl;
        for (int n = 1; n < SIZE; n++) {
            auto start = std::chrono::high_resolution_clock::now();
            cv::Canny(ImageHostArr[n], Detected_EdgesHost[n], 2.0, 100.0, 3, false);
            auto finish = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double> elapsed_time = finish - start;
            File1 << "Image Size: " << ImageHostArr[n].rows* ImageHostArr[n].cols << "  " << "Elapsed Time: " << elapsed_time.count() * 1000 << " msecs" << "\n" << std::endl;
        }


        cv::Ptr<cv::cuda::CannyEdgeDetector> canny_edg = cv::cuda::createCannyEdgeDetector(2.0, 100.0, 3, false);   



        for (int n = 1; n < SIZE; n++) {
            auto start = std::chrono::high_resolution_clock::now();
            canny_edg->detect(ImageDevArr[n], Detected_EdgesDev[n]);
            auto finish = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double> elapsed_time = finish - start;
            File2 << "Image Size: " << ImageDevArr[n].rows* ImageDevArr[n].cols << "  " << "Elapsed Time: " << elapsed_time.count() * 1000 << " msecs" << "\n" << std::endl;
        }
        std::cout << "Process ended... \n" << std::endl;



    return 0;
}

CUDA Canny Edge Detector is slower than cv::Canny

Hello there.

This is my first post here. I started to learn to use OpenCV and its CUDA capabilities. I've written a simple code that reads input image, resizes it and detect edges with both cv::Canny and CUDA Canny Edge Detector object and log results to a .txt file. My image is 960x585 and 66.3 KB. I used C++ standard chrono library to measure the time spent at Edge Detection process and results show that the time spent at GPU is far more greater than the time spent at CPU. My code and results are given below. Are my results normal or am I doing something very wrong?

Laptop specs;

8 GB RAM

Intel i74700 MQ CPU 2.40 GHz

NVIDIA Geforce GT 745M GPU

#include <stdio.h>
#include <opencv2\core\core.hpp>
#include <opencv2\core\cuda.hpp>
#include <opencv2\imgproc.hpp>
#include <opencv2\opencv.hpp>
#include <chrono>
#include <fstream>


#define SIZE 25

int main()
{
    cv::Mat ImageHost = cv::imread("C:\\Users\\Heisenberg\\Desktop\\revan.jpg", CV_LOAD_IMAGE_GRAYSCALE);



        cv::Mat ImageHostArr[SIZE];

        cv::cuda::GpuMat ImageDev;
        cv::cuda::GpuMat ImageDevArr[SIZE];

        ImageDev.upload(ImageHost);


        for (int n = 1; n < SIZE; n++)
            cv::resize(ImageHost, ImageHostArr[n], cv::Size(), 0.5*n, 0.5*n, CV_INTER_LINEAR);


        for (int n = 1; n < SIZE; n++)
            cv::cuda::resize(ImageDev, ImageDevArr[n], cv::Size(), 0.5*n, 0.5*n, CV_INTER_LINEAR); 

        cv::Mat Detected_EdgesHost[SIZE];
        cv::cuda::GpuMat Detected_EdgesDev[SIZE];

        std::ofstream File1, File2;

        File1.open("C:\\Users\\Heisenberg\\Desktop\\canny_cpu.txt");
        File2.open("C:\\Users\\Heisenberg\\Desktop\\canny_gpu.txt");


        std::cout << "Process started... \n" << std::endl;
        for (int n = 1; n < SIZE; n++) {
            auto start = std::chrono::high_resolution_clock::now();
            cv::Canny(ImageHostArr[n], Detected_EdgesHost[n], 2.0, 100.0, 3, false);
            auto finish = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double> elapsed_time = finish - start;
            File1 << "Image Size: " << ImageHostArr[n].rows* ImageHostArr[n].cols << "  " << "Elapsed Time: " << elapsed_time.count() * 1000 << " msecs" << "\n" << std::endl;
        }


        cv::Ptr<cv::cuda::CannyEdgeDetector> canny_edg = cv::cuda::createCannyEdgeDetector(2.0, 100.0, 3, false);   



        for (int n = 1; n < SIZE; n++) {
            auto start = std::chrono::high_resolution_clock::now();
            canny_edg->detect(ImageDevArr[n], Detected_EdgesDev[n]);
            auto finish = std::chrono::high_resolution_clock::now();
            std::chrono::duration<double> elapsed_time = finish - start;
            File2 << "Image Size: " << ImageDevArr[n].rows* ImageDevArr[n].cols << "  " << "Elapsed Time: " << elapsed_time.count() * 1000 << " msecs" << "\n" << std::endl;
        }
        std::cout << "Process ended... \n" << std::endl;



    return 0;
}