【发布时间】:2018-08-11 15:47:47
【问题描述】:
对于我使用 OpenCL 映射缓冲区的代码是否正确,我有点困惑。
我了解缓冲区/映射特定操作是在 OpenCL 环境中将映射(零复制)内存机制与 GPU 一起使用的最有效方式。
我不明白为什么 res_nb 在每次迭代时都没有初始化为 0。
在每次迭代中,res_nb 乘以 2
我知道我应该做错误检查等等。
OpenCL 代码
__kernel void test(
__global uint* res_nb_g,
)
{
// atomicAdd will return the value which was stored at "res_nb_g" before "1" was added.
int i = atomic_add(res_nb_g, 1);
}
C 代码
cl_uint res_nb = 0;
cl_mem res_nb_g = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint), &res_nb, &clStatus);
clSetKernelArg(test_kernel, 0, sizeof(res_nb_g), &res_nb_g);
for (int run = 0; run < 10; run++) {
res_nb = *((cl_uint *)clEnqueueMapBuffer(clqueue, res_nb_g, CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_uint), 0, NULL, NULL, NULL));
res_nb = 0;
clEnqueueUnmapMemObject(clqueue, res_nb_g, &res_nb, 0, NULL, NULL);
clEnqueueNDRangeKernel(clqueue, test_kernel, 1, NULL, &g_work_size, &l_work_size, 0, NULL, NULL);
clFinish(clqueue);
res_nb = *((cl_uint *)clEnqueueMapBuffer(clqueue, res_nb_g, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, NULL, NULL, &clStatus));
}
使用我的解决方案进行编辑:
cl_uint *res_nb = 0;
cl_mem res_nb_g = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(cl_uint), NULL, &clStatus);
clCheckError(clStatus);
clStatus = clSetKernelArg(test_kernel, 0, sizeof(res_nb_g), &res_nb_g);
clCheckError(clStatus);
for (cl_uint run = 0; run < nbruns; run++) {
res_nb = (cl_uint *)clEnqueueMapBuffer(clqueue, res_nb_g, CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_uint), 0, NULL, NULL, &clStatus);
clCheckError(clStatus);
*res_nb = 0;
clStatus = clEnqueueUnmapMemObject(clqueue, res_nb_g, res_nb, 0, NULL, NULL);
clCheckError(clStatus);
clStatus = clEnqueueNDRangeKernel(clqueue, test_kernel, 1, NULL, &g_work_size, &l_work_size, 0, NULL, NULL);
clCheckError(clStatus);
clFinish(clqueue); // Not necessary
res_nb = (cl_uint *)clEnqueueMapBuffer(clqueue, res_nb_g, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, NULL, NULL, &clStatus);
clCheckError(clStatus);
// Edit: remark @ Andrew Savonichev
clStatus = clEnqueueUnmapMemObject(clqueue, res_nb_g, res_nb, 0, NULL, NULL);
clCheckError(clStatus);
}
【问题讨论】:
标签: c++ c performance memory-management opencl