【发布时间】:2016-12-27 21:48:11
【问题描述】:
我已经编写了一些 openCl 程序有一段时间了,现在我正在尝试制作一个简单的程序来读取 .pam 格式的图像,然后按原样保存。
现在的问题是,当我运行它时,为打开图像提供一个名称,它在创建第一个 2dImage 时返回错误 -40。
代码如下:
#define SEPARATOR "==============================================\n"
/* Copia tutto tranne <imgInfo>.data
* -Questo vuol dire che l'immagine avrà anche stessa altezza e larghezza */
void copy_img_info(imgInfo* src, imgInfo *dst){
dst->channels=src->channels;
dst->data_size=src->data_size;
dst->depth=src->depth;
dst->height=src->height;
dst->maxval=src->maxval;
dst->width=src->width;
}
cl_event launch_op(cl_command_queue que, cl_kernel k_op,
imgInfo info_open, imgInfo info_to_save,
int _lws,
cl_mem src, cl_mem dst,
cl_int num_events, const cl_event* wait_list){
cl_int err;
cl_event evt_kernel;
size_t lws[]={
_lws ? _lws : 16, _lws ? _lws : 16
};
size_t gws[]={
round_mul_up(info_open.height, lws[0]), round_mul_up(info_open.width, lws[1])
};
err = clSetKernelArg(k_op, 0, sizeof(src),&src);
ocl_check(err, "Set op k arg 0");
err = clSetKernelArg(k_op, 1, sizeof(dst),&dst);
ocl_check(err, "Set op k arg 1");
err = clEnqueueNDRangeKernel(que, k_op, 2, NULL, gws, lws, num_events, wait_list, &evt_kernel);
ocl_check(err, "Enqueue kernel op");
return evt_kernel;
}
int main(int argc, char* argv[]) {
if( argc <2 ){
printf("Inserire nome del file\n");
exit(EXIT_FAILURE);
}
int _lws;
if( argc <3)
_lws= 0;
else{
_lws= atoi(argv[2]);
printf("Sarà usato %d come local work size\n",_lws);
}
imgInfo info_open, info_to_save;
cl_event evt_fill[2], evt_upload, evt_op, evt_download;
cl_image_format format = {
.image_channel_data_type = CL_UNSIGNED_INT16,
.image_channel_order = CL_RGBA
};
cl_int err;
cl_platform_id p = select_platform();
cl_device_id d = select_device(p);
cl_context ctx = create_context(p, d);
cl_command_queue que = create_queue(ctx, d);
cl_program prog = create_program("Kernels/image_tr.ocl",NULL , ctx, d);
if(load_pam(argv[1], &info_open)){
printf("Errore durante apertura file\n");
exit(EXIT_FAILURE);
}
copy_img_info(&info_open, &info_to_save);
/* ALLOCATION OF BUFFERS */
cl_mem image_to_open = clCreateImage2D(ctx, CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY , &format, info_open.width, info_open.height,
info_open.width, NULL, &err );
ocl_check(err, "Allocate image to open");
cl_mem image_to_save = clCreateImage2D(ctx, CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY , &format, info_to_save.width, info_to_save.height,
info_to_save.width, NULL, &err );
ocl_check(err, "Allocate image to save");
/* ALLOCATION OF BUFFERS */
/* FILL IMAGES */
const size_t fill_color[]={0,0,0,0};
const size_t origin[3]= {0,0,0};
const size_t region[3]= { info_open.width, info_open.height, 1};
err = clEnqueueFillImage(que, image_to_open, fill_color , origin, region, 0, NULL, evt_fill);
ocl_check(err, "Enqueue Fill Buffer to open");
err = clEnqueueFillImage(que, image_to_save, fill_color , origin, region, 0, NULL, evt_fill + 1);
ocl_check(err, "Enqueue Fill Buffer to save");
/* FILL IMAGES */
/* UPLOAD IMAGE ON GPU */
err = clEnqueueWriteImage(que, image_to_open, CL_TRUE, origin, region,
info_open.width, 0, info_open.data, 1, evt_fill, &evt_upload );
ocl_check(err, "Upload image on GPU");
/* UPLOAD IMAGE ON GPU */
/* CREATION OF KERNELS */
cl_kernel k_op = clCreateKernel(prog, "op", &err );
ocl_check(err, "Creation of kernel op");
/* CREATION OF KERNELS */
/* LAUNCH CUSTOMS KERNELS */
const cl_event evt_wait_list [] = {
evt_fill, evt_upload
};
evt_op = launch_op(que, k_op, info_open, info_to_save, _lws, image_to_open, image_to_save, 2, evt_wait_list);
/* LAUNCH CUSTOMS KERNELS */
/* DOWNLOAD FROM GPU */
err = clEnqueueReadImage(que, image_to_save, CL_TRUE, origin, region, info_to_save.width, 0,
info_to_save.data, 1, &evt_op, &evt_download);
ocl_check(err, "Download from device");
/* DOWNLOAD FROM GPU */
/* SAVE PAM FILE */
if(save_pam("ocl_image_tr.pam",&info_to_save))
printf("Errore salvataggio file Pam\n");
/* SAVE PAM FILE */
/* BENCHMARKING */
cl_ulong runtime_fill[2]={
runtime_ns(*evt_fill), runtime_ns(*(evt_fill+1) )
};
cl_ulong runtime_upload= runtime_ns(evt_upload);
cl_ulong runtime_op = runtime_ns(evt_op);
cl_ulong runtime_download = runtime_ns(evt_download);
printf(SEPARATOR);
printf(" Kernel \t Runtime \t Bandwidth \t GFLOPS\n");
printf(" Fill_1 \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_fill[0],
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_fill[0]),
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_fill[0]));
printf(" Fill_2 \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_fill[1],
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_fill[1]),
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_fill[1]));
printf(" Upload \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_upload,
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_upload),
(double)(info_open.height*info_open.width*sizeof(cl_short4))/(runtime_upload));
printf(" Op \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_op,
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_op),
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_op));
printf(" Download \t %gms \t %gGB/s \t %g GFLOPS\n", 1.0e-6*runtime_download,
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_download),
(double)(info_to_save.height*info_to_save.width*sizeof(cl_short4))/(runtime_download));
printf(SEPARATOR);
/* BENCHMARKING */
/* CLEANING... */
clReleaseEvent(evt_op);
clReleaseEvent(evt_download);
clReleaseEvent(*evt_wait_list);
clReleaseEvent(*(evt_wait_list+1));
clReleaseEvent(*evt_fill);
clReleaseEvent(*(evt_fill+1));
clReleaseMemObject(image_to_open);
clReleaseMemObject(image_to_save);
clReleaseDevice(d);
clReleaseKernel(k_op);
clReleaseProgram(prog);
clReleaseContext(ctx);
clReleaseCommandQueue(que);
return 0;
}
这是内核:
__kernel void op(image2d_t read_only src, image2d_t write_only dst){
// Questa mappatura è più efficiente per la GPU
const sampler_t sampler_ui = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST ;
int row= get_global_id(1);
int col=get_global_id(0);
if( row > get_image_height(src) && col > get_image_width(src))
return;
uint4 pix= read_imageui(src, sampler_ui ,(int2)(row,col));
write_imageui(dst, (int2)(row,col), pix);
}
我使用的标题是由我的 GPGPU 老师创建的,我不能提供。 给出的错误:
Allocate image to open - error -40
消息是我在分配图像时打印。 怎么回事?
【问题讨论】:
-
错误-40是CL_INVALID_IMAGE_SIZE,但是图片是479x400或者1280x900
-
这真的是一个最小的例子吗?
-
我忘记了,我使用的是 Opencl 1.1 和 Nvidia Cuda 7.5
-
@loki 好的,它不是最小的,但它的作用真的很简单
标签: runtime-error opencl gpgpu