【问题标题】:Garbage output by array of struct in OpenCLOpenCL中结构数组的垃圾输出
【发布时间】:2012-07-21 19:37:15
【问题描述】:

我是 OpenCL 的初学者,我正在尝试使以下内核正常工作。我将一个结构数组传递给内核并尝试修改它的值。我的结构声明是:

#define LIST_SIZE 10
#pragma pack(push, 1)

typedef struct pairt {
    int a;
    int b;
} pairt;

#pragma pack(pop)

我创建缓冲区以传递此结构的主机代码是:

pairt p[LIST_SIZE];
p_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE, LIST_SIZE*sizeof(struct pairt), NULL, &ret);
ret = clEnqueueWriteBuffer(command_queue, p_mem_obj, CL_TRUE, 0, LIST_SIZE*sizeof(struct pairt), &p, 0, NULL, NULL);

我设置内核参数的代码是:

ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&p_mem_obj);
size_t global_item_size = LIST_SIZE;
size_t local_item_size = 2;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, &events[0]);
ret = clWaitForEvents(1, &events[0]);
ret = clReleaseEvent(events[0]);
ret = clEnqueueReadBuffer(command_queue, p_mem_obj, CL_TRUE, 0, LIST_SIZE*sizeof(struct pairt), p, 0, NULL, &events[1]);
ret = clWaitForEvents(1, &events[1]);

我的内核是:

struct __attribute__ ((packed)) pairt {
    int a;
    int b;
};

__kernel void simple_diff( __global struct pairt* p)
{
    int i = get_global_id(0);
    __global struct pairt *tmp = &p[i];

    tmp->a = tmp->a * -1;
    tmp->b = tmp->b * -1;

}

我用以下值初始化数组:

1 2
3 4
5 6
7 8
9 10
11 12
13 14
15 16
17 18
19 20

但是内核返回的值是:

-298660672 -32767
0 0
-4198172 0
-298660832 -32767
-4200052 0
-1 -2
-3 -4
-5 -6
-7 -8
-9 -10

我不知道为什么会这样?

【问题讨论】:

  • 在您致电clEnqueueWriteBuffer 时,您不应使用p 的地址。只需通过p

标签: arrays struct opencl


【解决方案1】:

好吧,我在 4 核 CPU 和 AMD 6950 GPU 上使用 32 位和 64 位版本的两种设备尝试了下面的代码。它为我工作...

尝试将结构从 pragma pack 1 更改为默认打包或使用 int2 向量

/*  
Copyright (C) Tim Child 2012
All rights reserved

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

*/

#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <memory.h>
#include <malloc.h>

#include "CL\cl.h"

#define LIST_SIZE 10
#pragma pack(push, 1)

typedef struct pairt 
{
    int a;
    int b;
} pairt;

#pragma pack(pop)

typedef struct pair32 
{
    int32_t a;
    int32_t b;
} pair32;

void  printAligment( char * msg, size_t a, size_t b)
{
    printf("%s\n", msg);
    printf( "\t%d\t%d\n", a, b );
}

/**
    OpenCLEnvironment  An OpenCl Environment structure
**/

typedef struct OpenCLEnvironment
{
    cl_platform_id *    platformId;
    cl_uint             platformCount;  
    cl_device_id *      deviceId;
    cl_uint      *      deviceCount;    
    cl_uint             deviceTotal;
    cl_context          context;
    cl_command_queue    queue;
    cl_uint             currentPlatform;
    cl_uint             currentDevice;

} OpenCLEnvironment;

typedef struct OpenCLKernel
{
        char *      name;
        cl_program  program;
        cl_kernel   kernel;
        char *      sourceCode;
        char *      compilerOptions;
        char *      compilerErrors;

}  OpenCLKernel;

/**
    NewOpenCLEnvironment  Create a new OpenCL Environment
    @param[in]  currentPlatform  Index of the platform Id to use
    @param[in]  currentDevice    Index of the Device Id to use

    @returns OpenCL Environment
**/
OpenCLEnvironment * NewOpenCLEnvironment(cl_uint     currentPlatform, cl_uint    currentDevice)
{
    OpenCLEnvironment * environment = NULL;
    cl_int      error = 0;
    cl_uint     i = 0;
    cl_uint     sum;
    cl_uint     offset = 0;
    static cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, 0, 0};

    if ( ( environment = (OpenCLEnvironment *)malloc(sizeof (OpenCLEnvironment  ) ) ) != NULL)
    {
        memset( environment, 0, sizeof(OpenCLEnvironment) );

        if ( ( error = clGetPlatformIDs( 0, NULL,  &environment->platformCount) ) == CL_SUCCESS)
        {
            environment->deviceCount = (cl_uint*)malloc( sizeof( cl_uint) * environment->platformCount  ) ;
            environment->platformId =  (cl_platform_id *)malloc( sizeof( cl_platform_id) * environment->platformCount  ) ;
            if ( environment->platformId &&
                environment->platformCount > 0 &&
                ( error = clGetPlatformIDs( environment->platformCount, environment->platformId,  0) ) == CL_SUCCESS)
            {
                sum = 0;
                for( i = 0; i < environment->platformCount; i++)
                {
                     if ( ( error = clGetDeviceIDs( environment->platformId[i], CL_DEVICE_TYPE_ALL, 0, NULL, &sum ) ) == CL_SUCCESS )
                     {
                         environment->deviceCount[i] = sum;
                         environment->deviceTotal += sum;
                     }
                }

                environment->deviceId = (cl_device_id *) malloc( sizeof(cl_device_id) * environment->deviceTotal );
                offset = 0;
                for( i = 0; i < environment->platformCount  && environment->deviceId; i++)
                {
                    if (  ( error = clGetDeviceIDs( environment->platformId[i], CL_DEVICE_TYPE_ALL, environment->deviceCount[i], 
                                        &environment->deviceId[offset],  &sum ) ) == CL_SUCCESS )
                    {
                        offset += sum;
                    }
                }
            }
        }

        if ( currentPlatform <  environment->platformCount && currentDevice < environment->deviceTotal )
        {
            environment->currentPlatform = currentPlatform;
            environment->currentDevice = currentDevice;
            properties[ 1] = (cl_context_properties)environment->platformId[ environment->currentPlatform ];
            environment->context = clCreateContext( properties, 1, &environment->deviceId[ environment->currentDevice ],
                NULL, NULL, &error );

            if (error == CL_SUCCESS)
            {
                environment->queue = clCreateCommandQueue( environment->context,environment->deviceId[ environment->currentDevice ], 0, &error);
            }
        }
    }

     return environment;
}

/**
    NewOpenCLKernel  Create a new OpenCL Kernel from Source code
    @param[in] environment  OpenC;l Environment
    @param[in] name Kernel Name
    @param[in] sourceCode  Kernel source code
    @param[in] compilerOptions command line option used to compile the  kernel

    @returns  a New OpenCL Kernel structure
**/
OpenCLKernel * NewOpenCLKernel( OpenCLEnvironment *  environment, char * name, char * sourceCode, char * compilerOptions )
{
    OpenCLKernel *  kernel = NULL;
    size_t          size = 0;
    cl_int          error = CL_SUCCESS;
    cl_int          error2 = CL_SUCCESS;

    if (  ( kernel = ( OpenCLKernel * ) malloc(sizeof ( OpenCLKernel )  ) ) != NULL )
    {
        memset(kernel, 0, sizeof( OpenCLKernel ) );
        kernel->sourceCode = sourceCode;
        kernel->name = name;
        kernel->compilerOptions = compilerOptions;

        kernel->program = clCreateProgramWithSource( environment->context, 1, &kernel->sourceCode, NULL, &error );

        error = clBuildProgram( kernel->program, 1, &environment->deviceId[ environment->currentDevice], kernel->compilerOptions,
            NULL, NULL );

        if ( error == CL_BUILD_SUCCESS )
        {
             kernel->kernel = clCreateKernel( kernel->program, name, &error );
        }
        else if ( error != CL_SUCCESS)
        {
            error2 = clGetProgramBuildInfo( kernel->program, environment->deviceId[ environment->currentDevice], 
                CL_PROGRAM_BUILD_LOG, (size_t)NULL, NULL, &size );
            kernel->compilerErrors = (char*)malloc( size  +1);

            error2 = clGetProgramBuildInfo( kernel->program, environment->deviceId[ environment->currentDevice], 
                CL_PROGRAM_BUILD_LOG, size, kernel->compilerErrors , &size );

        }

    }

    return kernel;
}


void PrintPair(char * msg, pairt * p, int n)
{
    int i = 0;

    printf("%s\n", msg);
    for ( i = 0; i < n; i++)
    {
        printf ("\t%d\t%d\n", p[i].a, p[i].b );
    }
    printf( " \n" );
}

int main ( int argc, char  ** argv)
{
    cl_int      error = CL_SUCCESS;
    cl_mem      p_mem_obj;
    size_t      global_item_size = LIST_SIZE;
    size_t      local_item_size = 2;
    cl_event    events[2];

    static pairt    unaligned[] = { 
        1, 2,
        3, 4,
        5, 6,
        7, 8,
        9, 10,
        11, 12,
        13, 14,
        15, 16,
        17, 18,
        19, 20,};

    pair32  aligned[] = { 
        1, 2,
        3, 4,
        5, 6,
        7, 8,
        9, 10,
        11, 12,
        13, 14,
        15, 16,
        17, 18,
        19, 20,};

    cl_int2 vector[] = {

        1, 2,
        3, 4,
        5, 6,
        7, 8,
        9, 10,
        11, 12,
        13, 14,
        15, 16,
        17, 18,
        19, 20,};

    OpenCLEnvironment *  environment = NewOpenCLEnvironment(0, 1);



    static char * simple_diff = "struct __attribute__ ((packed)) pairt { \
    int a; \
    int b; \
}; \
 \
__kernel void simple_diff( __global struct pairt* p) \
{                                                    \
    int i = get_global_id(0); \
    __global struct pairt *tmp = &p[i]; \
    \
    tmp->a = tmp->a * -1; \
    tmp->b = tmp->b * -1; \
\
}";

    OpenCLKernel *  simpleDiff;  

    printAligment( "Pack 1 aligned", offsetof(pairt, a), offsetof(pairt, b) );
    printAligment( "Default aligned", offsetof(pair32, a), offsetof(pair32, b) );
    printAligment( "Vector aligned", offsetof(cl_int2, s[0]), offsetof(cl_int2, s[1]) );

    simpleDiff = NewOpenCLKernel(environment,  "simple_diff", simple_diff, "" );

    p_mem_obj = clCreateBuffer(environment->context, CL_MEM_READ_WRITE, LIST_SIZE*sizeof(struct pairt), NULL, &error);
    error = clEnqueueWriteBuffer(environment->queue, p_mem_obj, CL_TRUE, 0, LIST_SIZE*sizeof(struct pairt), &unaligned, 0, NULL, NULL);

    error = clSetKernelArg(simpleDiff->kernel, 0, sizeof(cl_mem), (void *)&p_mem_obj);

    error = clEnqueueNDRangeKernel( environment->queue, simpleDiff->kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, &events[0]);
    error = clWaitForEvents(1, &events[0]);
    error = clReleaseEvent(events[0]);
    error = clEnqueueReadBuffer(environment->queue, p_mem_obj, CL_TRUE, 0, LIST_SIZE*sizeof(struct pairt), unaligned, 0, NULL, &events[1]);
    error = clWaitForEvents(1, &events[1]);
    error = clReleaseEvent(events[1]);


     PrintPair( "Pack 1", unaligned, sizeof(unaligned)/sizeof(unaligned[0]) )
}

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2018-12-10
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2019-05-06
    • 2021-08-07
    相关资源
    最近更新 更多