Chris3D's profile - activity

overview network karma followed questions activity

2017-10-02 01:13:01 -0600	received badge	● Notable Question (source)
2016-02-23 22:11:55 -0600	received badge	● Popular Question (source)
2012-10-17 15:24:07 -0600	commented question	cv::gpu::remap comparatively slow Yeah thats what I'm doing already, the measurements are from actual runtime, first few calls won't factor in.
2012-10-17 06:32:00 -0600	asked a question	cv::gpu::remap comparatively slow Hello OpenCV-CUDA community, because processing speed is very important for my application, I moved my LensUndistortion function from CPU to GPU processing. But the expected performance gain did not show, the opposite is the case, gpu::remap is slower than the cpu remap. My Measurements for an 1280x720 image remap with linear interpolation: `i5: 9,7ms i7m: 5,8ms gts250: 23ms (upload: 2ms, remap: 17ms, download: 4ms) gtx560m: 40ms ??` The question: am I doing something wrong or is this the expected behavior? I'm Using OpenCV 2.4.2 with CUDA 4.2. Here's my code: CPU: double LensUndistort(CIdarBaseFrame* frameHelper, IplImage* mapX, IplImage* mapY, CvMat* efficientMatX, CvMat* efficientMatY, int quality) { if(frameHelper == NULL \|\| mapX == NULL \|\| mapY == NULL \|\| efficientMatX == NULL \|\| efficientMatY == NULL) return -1; CHighPerformanceCounter calculations; try { calculations.Tick(); if(m_pInput == NULL) { CvSize imgSize = cvSize(frameHelper->GetWidth(), frameHelper->GetHeight()); m_pInput = cvCreateImage(imgSize, IPL_DEPTH_8U, 3); } if(m_pOutput == NULL) { CvSize imgSize = cvSize(frameHelper->GetWidth(), frameHelper->GetHeight()); m_pOutput = cvCreateImage(imgSize, IPL_DEPTH_8U, 3); } //write our data into an iplImage container //IplImage's imageData field looks like this... BGRBGR : imageData[0] = B; imageData[1] = G; and so on.. int imageLen = 0; m_pInput->imageData = (char)frameHelper->GetReversedImageBytes(imageLen, timeToDecode); switch (quality) { case 1: cvRemap(m_pInput, m_pOutput, efficientMatX, efficientMatY, CV_INTER_LINEAR + cv::BORDER_CONSTANT, cvScalarAll(0)); //9.7ms //this looks good, but takes a little longer than nearest neighbour interpolation break; case 2: cvRemap(m_pInput, m_pOutput, efficientMatX, efficientMatY, CV_INTER_CUBIC + cv::BORDER_CONSTANT, cvScalarAll(0)); //65ms //this looks good, but takes a lot longer than nearest neighbour interpolation break; default: //or 0 cvRemap(m_pInput, m_pOutput, efficientMatX, efficientMatY, CV_INTER_NN + cv::BORDER_CONSTANT); //8.5ms //nearest neighbour interpolation is fastest! But looks shitty :( break; } //set image frameHelper->SetReversedImageBytes((BYTE)(m_pOutput->imageData), imageLen); } catch( cv::Exception& e ) { const char* err_msg = e.what(); CString err; err.Format(_T("Error while LensUndistort(). Description: %s"), err_msg); theLog.Log(err, EVENTLOG_ERROR_TYPE, 0, 0, SERIOUS); return -1; } catch(CException* p_Ex) { TCHAR lpszError[MAX_TEMP_BUFFER]; p_Ex->GetErrorMessage(lpszError, MAX_TEMP_BUFFER); CString err; err.Format(_T("Error while LensUndistort(). Description: %s"), lpszError); theLog.Log(err, EVENTLOG_ERROR_TYPE, 0, 0, SERIOUS); p_Ex->Delete(); return -1; } return calculations.GetDeltaInMS(); } GPU: double LensUndistortGPU(CIdarBaseFrame* frameHelper, IplImage* mapX, IplImage* mapY, int quality) { if(frameHelper == NULL \|\| mapX == NULL \|\| mapY == NULL) return -1; CHighPerformanceCounter calculations; try { calculations.Tick(); cv::Size imgSize = cv::Size(frameHelper->GetWidth(), frameHelper->GetHeight()); if(m_inputGPU.data == NULL) { m_inputGPU = cv::gpu::GpuMat(imgSize, CV_8UC3); } if(m_outputGPU.data == NULL) { m_outputGPU = cv::gpu::GpuMat(imgSize, CV_8UC3); } int imageLen = 0; double timeToDecode = 0.0; cv::Mat input = cv::Mat(imgSize, CV_8UC3); input.data = (uchar*)frameHelper->GetReversedImageBytes(imageLen, timeToDecode); //gets the decoded image bytes (should take no time at all, since the image is already decoded) m_inputGPU.upload(input); cv::gpu::GpuMat matXGPU(mapX); cv::gpu::GpuMat matYGPU(mapY); switch (quality) { case 1: cv::gpu::remap(m_inputGPU, m_outputGPU, matXGPU, matYGPU, CV_INTER_LINEAR, cv::BORDER_CONSTANT); //22ms break; case 2: cv::gpu::remap(m_inputGPU, m_outputGPU, matXGPU, matYGPU, CV_INTER_CUBIC, cv::BORDER_CONSTANT); //45ms break; default: //or 0 cv::gpu::remap(m_inputGPU, m_outputGPU, matXGPU, matYGPU, CV_INTER_NN, cv::BORDER_CONSTANT); //15ms break; } cv::Mat output = cv::Mat(imgSize, CV_8UC3 ... (more)