【发布时间】:2019-05-16 09:28:05
【问题描述】:
这是使用CUDA内核和opencv读取和显示图像来翻转图像的代码,在main函数中,显示输入的图片,但输出显示为黑色窗口。顺便说一句,代码没有错误,它可以编译和运行,但输出看起来很奇怪。以下是我目前尝试过的。
#include< iostream>
#include< cstdio>
#include < opencv2/core.hpp>
#include < opencv2/imgcodecs.hpp>
#include < opencv2/highgui.hpp>
#include< cuda_runtime.h >
using std::cout;
using std::endl;
__global__ void mirror( unsigned char* input, unsigned char* output, int numRows, int numCols)
{
//2D Index of current thread
const int col = blockIdx.x * blockDim.x + threadIdx.x;
const int row = blockIdx.y * blockDim.y + threadIdx.y;
if ( col >= numCols || row >= numRows ) return;
int thread_x = blockDim.x * blockIdx.x + threadIdx.x;
int thread_y = blockDim.y * blockIdx.y + threadIdx.y;
int thread_x_new = numCols-thread_x;
int thread_y_new = thread_y;
int mId = thread_y * numCols + thread_x;
int mId_new = thread_y_new * numCols + thread_x_new;
output[mId_new] = input[mId];
}
void convert_to_mirror(const cv::Mat& input, cv::Mat& output,int numrows,int numcols)
{
const dim3 blockSize(1024,1,1);
int a=numcols/blockSize.x, b=numrows/blockSize.y;
const dim3 gridSize(a+1,b+1,1);
const size_t numPixels = numrows * numcols;
unsigned char *d_input, *d_output;
cudaMalloc<unsigned char>(&d_input, numPixels);
cudaMalloc<unsigned char>(&d_output,numPixels);
//Copy data from OpenCV input image to device memory
cudaMemcpy(d_input,input.ptr(), numPixels,cudaMemcpyHostToDevice);
//Call mirror kernel.
mirror<<<gridSize, blockSize>>>(d_input,d_output, numrows, numcols);
cudaDeviceSynchronize();
//copy output from device to host
cudaMemcpy(output.ptr(), d_output,numPixels, cudaMemcpyDeviceToHost);
cudaFree(d_input);
cudaFree(d_output);
}
int main()
{
//Read input image from the disk
cv::Mat input = cv::imread("C:/a.jpg", cv::IMREAD_COLOR);
const int rows = input.rows;
const int cols = input.cols;
if(input.empty())
{
std::cout<<"Image Not Found!"<<std::endl;
std::cin.get();
return -1;
}
//Create output image
cv::Mat output(rows,cols,CV_8UC3);
//Call the wrapper function
convert_to_mirror(input,output,rows,cols);
//Show the input and output
cv::imshow("Input",input);
cv::imshow("Output",output);
//Wait for key press
cv::waitKey();
return 0;
}
【问题讨论】:
-
您是否尝试过编写一个完全不调用 CUDA 的
mirror<<<>>>的仅 CPU 实现?我相信您的问题可以归结为不正确的 CUDA 使用(在这种情况下 MCVE 不会包含 OpenCV)或不正确的 OpenCV 使用(在这种情况下 MCVE 不会包含任何 CUDA)。或者两者都有,在这种情况下,您最终会遇到两个问题,而不是一个。