【发布时间】:2020-05-24 06:53:23
【问题描述】:
我从“Hands-On GPU-Accelerated Computer Vision with OpenCV and CUDA”一书中复制了两个例子来比较 CPU 和 GPU 的性能。
第一个代码:
cv::Mat src = cv::imread("D:/Pics/Pen.jpg", 0); // Pen.jpg is a 4096 * 4096 GrayScacle picture.
cv::Mat result_host1, result_host2, result_host3, result_host4, result_host5;
//Get initial time in miliseconds
int64 work_begin = getTickCount();
cv::threshold(src, result_host1, 128.0, 255.0, cv::THRESH_BINARY);
cv::threshold(src, result_host2, 128.0, 255.0, cv::THRESH_BINARY_INV);
cv::threshold(src, result_host3, 128.0, 255.0, cv::THRESH_TRUNC);
cv::threshold(src, result_host4, 128.0, 255.0, cv::THRESH_TOZERO);
cv::threshold(src, result_host5, 128.0, 255.0, cv::THRESH_TOZERO_INV);
//Get time after work has finished
int64 delta = getTickCount() - work_begin;
//Frequency of timer
double freq = getTickFrequency();
double work_fps = freq / delta;
std::cout << "Performance of Thresholding on CPU: " << std::endl;
std::cout << "Time: " << (1 / work_fps) << std::endl;
std::cout << "FPS: " << work_fps << std::endl;
return 0;
第二个代码:
cv::Mat h_img1 = cv::imread("D:/Pics/Pen.jpg", 0); // Pen.jpg is a 4096 * 4096 GrayScacle picture.
cv::cuda::GpuMat d_result1, d_result2, d_result3, d_result4, d_result5, d_img1;
//Measure initial time ticks
int64 work_begin = getTickCount();
d_img1.upload(h_img1);
cv::cuda::threshold(d_img1, d_result1, 128.0, 255.0, cv::THRESH_BINARY);
cv::cuda::threshold(d_img1, d_result2, 128.0, 255.0, cv::THRESH_BINARY_INV);
cv::cuda::threshold(d_img1, d_result3, 128.0, 255.0, cv::THRESH_TRUNC);
cv::cuda::threshold(d_img1, d_result4, 128.0, 255.0, cv::THRESH_TOZERO);
cv::cuda::threshold(d_img1, d_result5, 128.0, 255.0, cv::THRESH_TOZERO_INV);
cv::Mat h_result1, h_result2, h_result3, h_result4, h_result5;
d_result1.download(h_result1);
d_result2.download(h_result2);
d_result3.download(h_result3);
d_result4.download(h_result4);
d_result5.download(h_result5);
//Measure difference in time ticks
int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency();
//Measure frames per second
double work_fps = freq / delta;
std::cout << "Performance of Thresholding on GPU: " << std::endl;
std::cout << "Time: " << (1 / work_fps) << std::endl;
std::cout << "FPS: " << work_fps << std::endl;
return 0;
一切正常,除了:
“GPU 速度低于 CPU”
第一个结果:
Performance of Thresholding on CPU:
Time: 0.0475497
FPS: 21.0306
第二个结果:
Performance of Thresholding on GPU:
Time: 0.599032
FPS: 1.66936
然后,我决定撤销上传下载时间:
第三个代码:
cv::Mat h_img1 = cv::imread("D:/Pics/Pen.jpg", 0); // Pen.jpg is a 4096 * 4096 GrayScacle picture.
cv::cuda::GpuMat d_result1, d_result2, d_result3, d_result4, d_result5, d_img1;
d_img1.upload(h_img1);
//Measure initial time ticks
int64 work_begin = getTickCount();
cv::cuda::threshold(d_img1, d_result1, 128.0, 255.0, cv::THRESH_BINARY);
cv::cuda::threshold(d_img1, d_result2, 128.0, 255.0, cv::THRESH_BINARY_INV);
cv::cuda::threshold(d_img1, d_result3, 128.0, 255.0, cv::THRESH_TRUNC);
cv::cuda::threshold(d_img1, d_result4, 128.0, 255.0, cv::THRESH_TOZERO);
cv::cuda::threshold(d_img1, d_result5, 128.0, 255.0, cv::THRESH_TOZERO_INV);
//Measure difference in time ticks
int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency();
//Measure frames per second
double work_fps = freq / delta;
std::cout << "Performance of Thresholding on GPU: " << std::endl;
std::cout << "Time: " << (1 / work_fps) << std::endl;
std::cout << "FPS: " << work_fps << std::endl;
cv::Mat h_result1, h_result2, h_result3, h_result4, h_result5;
d_result1.download(h_result1);
d_result2.download(h_result2);
d_result3.download(h_result3);
d_result4.download(h_result4);
d_result5.download(h_result5);
return 0;
但是,问题一直存在:
第三个结果:
Performance of Thresholding on GPU:
Time: 0.136095
FPS: 7.34779
我对这个问题感到困惑。
1st 2nd 3rd
CPU GPU GPU
Time: 0.0475497 0.599032 0.136095
FPS: 21.0306 1.66936 7.34779
请帮帮我。
GPU 规格:
*********************************************************
NVIDIA Quadro K2100M
Micro architecture: Kepler
Compute capability version: 3.0
CUDA Version: 10.1
*********************************************************
我的系统规格:
*********************************************************
laptop hp ZBook
CPU: Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz 2.90 GHZ
RAM: 8.00 GB
OS: Windows 7, 64-bit, Ultimate, Service Pack 1
*********************************************************
【问题讨论】: