【问题标题】:OpenACC How can I keep a data between differetn calls of a function?OpenACC 如何在函数的不同调用之间保留数据?
【发布时间】:2020-08-21 04:03:30
【问题描述】:

我正在尝试使用 OpenACC 优化应用程序。总的来说,我有一个这种类型的迭代循环:

while(t<tstop){

 add(&data, nx);

}

其中 data 是数据类型的变量,由该结构定义

typedef struct Data_{   
  double *x;    
}Data;

我在 while 循环中调用的函数是可并行化的,但我无法做到的是在函数的不同调用之间在设备内存中维护数组 x[]。

void add(Data *data, int n){

  #pragma acc data pcopy(data[0:1])
  #pragma acc data pcopy(data->x[0:n])

  #pragma acc parallel loop
  for(int i=0; i < n ; i++){
    data->x[i] += 1.;
  }
  #pragma acc exit data copyout(data->x[0:n])
  #pragma acc exit data copyout(data[0:1])
}

我知道这个程序似乎没有意义,但我只是写了一些东西来重现我在真实代码中遇到的问题。

我尝试使用非结构化数据区域:

#pragma acc enter data copyin(data[0:1])
#pragma acc enter data copyin(data->x[0:n])

#pragma acc data present(data[:1], data->x[:n])
#pragma acc parallel loop
  for(int i=0; i < n ; i++){
    data->x[i] += 1.;
  }

#pragma acc exit data copyout(data->x[0:n])
#pragma acc exit data copyout(data[0:1])

但由于某种原因,我收到了这种类型的错误:

致命错误:数据子句中的变量部分存在于设备上:name=data

【问题讨论】:

    标签: struct gnu-parallel openacc


    【解决方案1】:

    我无法从它提供的代码片段中重现部分存在的错误,因此不清楚为什么会发生此错误。通常,当当前表中的变量大小与数据子句中使用的大小不同时,就会发生错误。如果你能提供一个重现的例子,我可以看看并确定它为什么会在这里发生。

    要回答主题问题,可以在它们所在的数据区域范围内的任何位置访问设备变量,甚至可以跨子例程访问。对于非结构化数据区域(即输入数据/退出数据),范围是在运行时在进入和退出调用之间定义的。对于结构化数据区域,范围由结构化块定义。

    这是一个使用您在上面定义的结构的示例(尽管我已将 x 的大小作为结构的一部分)。

    % cat test.c
    #include <stdio.h>
    #include <stdlib.h>
    
    
    typedef struct Data_{
      double *x;
      int n;
    }Data;
    
    void add(Data *data){
    
    #pragma acc parallel loop present(data)
      for(int i=0; i < data->n ; i++){
        data->x[i] += 1.;
      }
    }
    
    int main () {
    
       Data *data;
       data = (Data*) malloc(sizeof(Data));
       data->n = 64;
       data->x = (double *) malloc(sizeof(double)*data->n);
       for(int i=0; i < data->n ; i++){
          data->x[i] = (double) i;
       }
    
    #pragma acc enter data copyin(data[0:1])
    #pragma acc enter data copyin(data->x[0:data->n])
       add(data);
    #pragma acc exit data copyout(data->x[0:data->n])
    #pragma acc exit data delete(data)
    
       for(int i=0; i < data->n ; i++){
          printf("%d:%f\n",i,data->x[i]);
       }
       free(data->x);
       free(data);
    }
    % pgcc test.c -ta=tesla -Minfo=accel; a.out
    add:
         12, Generating present(data[:])
             Generating Tesla code
             13, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
    main:
         28, Generating enter data copyin(data[:1])
         29, Generating enter data copyin(data->x[:data->n])
         31, Generating exit data copyout(data->x[:data->n])
         32, Generating exit data delete(data[:1])
    0:1.000000
    1:2.000000
    2:3.000000
    3:4.000000
    4:5.000000
    5:6.000000
    6:7.000000
    7:8.000000
    8:9.000000
    9:10.000000
    10:11.000000
    11:12.000000
    12:13.000000
    13:14.000000
    14:15.000000
    15:16.000000
    16:17.000000
    17:18.000000
    18:19.000000
    19:20.000000
    20:21.000000
    21:22.000000
    22:23.000000
    23:24.000000
    24:25.000000
    25:26.000000
    26:27.000000
    27:28.000000
    28:29.000000
    29:30.000000
    30:31.000000
    31:32.000000
    32:33.000000
    33:34.000000
    34:35.000000
    35:36.000000
    36:37.000000
    37:38.000000
    38:39.000000
    39:40.000000
    40:41.000000
    41:42.000000
    42:43.000000
    43:44.000000
    44:45.000000
    45:46.000000
    46:47.000000
    47:48.000000
    48:49.000000
    49:50.000000
    50:51.000000
    51:52.000000
    52:53.000000
    53:54.000000
    54:55.000000
    55:56.000000
    56:57.000000
    57:58.000000
    58:59.000000
    59:60.000000
    60:61.000000
    61:62.000000
    62:63.000000
    63:64.000000
    

    另外,这是第二个示例,但现在“data”是一个数组,其中每个“x”的大小可以不同。

    % cat test2.c
    #include <stdio.h>
    #include <stdlib.h>
    
    #define M 16
    
    typedef struct Data_{
      double *x;
      int n;
    }Data;
    
    void add(Data *data){
    
    #pragma acc parallel loop present(data)
      for(int i=0; i < data->n ; i++){
        data->x[i] += 1.;
      }
    }
    
    int main () {
    
       Data *data;
       data = (Data*) malloc(sizeof(Data)*M);
    #pragma acc enter data create(data[0:M])
       for (int i =0; i < M; ++i) {
          data[i].n = i+1;
          data[i].x = (double *) malloc(sizeof(double)*data[i].n);
          for(int j=0; j < data[i].n ; j++){
             data[i].x[j] = (double)((i*data[i].n) + j);
          }
    #pragma acc update device(data[i].n)
    #pragma acc enter data copyin(data[i].x[0:data[i].n])
       }
    
       for (int i =0; i < M; ++i) {
         add(&data[i]);
       }
    
       for (int i =0; i < M; ++i) {
    #pragma acc update self(data[i].x[:data[i].n])
         for(int j=0; j < data[i].n ; j++){
          printf("%d:%d:%f\n",i,j,data[i].x[j]);
       }}
    
       for (int i =0; i < M; ++i) {
    #pragma acc exit data delete(data[i].x)
          free(data[i].x);
       }
    #pragma acc exit data delete(data)
       free(data);
    
    }
    % pgcc test2.c -ta=tesla -Minfo=accel; a.out
    add:
         11, Generating present(data[:1])
             Generating Tesla code
             14, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
    main:
         22, Generating enter data create(data[:16])
         32, Generating update device(data->n)
             Generating enter data copyin(data->x[:data->n])
         38, Generating update self(data->x[:data->n])
         46, Generating exit data delete(data->x[:1])
         49, Generating exit data delete(data[:1])
    0:0:1.000000
    1:0:3.000000
    1:1:4.000000
    2:0:7.000000
    2:1:8.000000
    2:2:9.000000
    3:0:13.000000
    3:1:14.000000
    3:2:15.000000
    3:3:16.000000
    4:0:21.000000
    4:1:22.000000
    4:2:23.000000
    4:3:24.000000
    4:4:25.000000
    5:0:31.000000
    5:1:32.000000
    5:2:33.000000
    5:3:34.000000
    5:4:35.000000
    5:5:36.000000
    6:0:43.000000
    6:1:44.000000
    6:2:45.000000
    6:3:46.000000
    6:4:47.000000
    6:5:48.000000
    6:6:49.000000
    7:0:57.000000
    7:1:58.000000
    7:2:59.000000
    7:3:60.000000
    7:4:61.000000
    7:5:62.000000
    7:6:63.000000
    7:7:64.000000
    8:0:73.000000
    8:1:74.000000
    8:2:75.000000
    8:3:76.000000
    8:4:77.000000
    8:5:78.000000
    8:6:79.000000
    8:7:80.000000
    8:8:81.000000
    9:0:91.000000
    9:1:92.000000
    9:2:93.000000
    9:3:94.000000
    9:4:95.000000
    9:5:96.000000
    9:6:97.000000
    9:7:98.000000
    9:8:99.000000
    9:9:100.000000
    10:0:111.000000
    10:1:112.000000
    10:2:113.000000
    10:3:114.000000
    10:4:115.000000
    10:5:116.000000
    10:6:117.000000
    10:7:118.000000
    10:8:119.000000
    10:9:120.000000
    10:10:121.000000
    11:0:133.000000
    11:1:134.000000
    11:2:135.000000
    11:3:136.000000
    11:4:137.000000
    11:5:138.000000
    11:6:139.000000
    11:7:140.000000
    11:8:141.000000
    11:9:142.000000
    11:10:143.000000
    11:11:144.000000
    12:0:157.000000
    12:1:158.000000
    12:2:159.000000
    12:3:160.000000
    12:4:161.000000
    12:5:162.000000
    12:6:163.000000
    12:7:164.000000
    12:8:165.000000
    12:9:166.000000
    12:10:167.000000
    12:11:168.000000
    12:12:169.000000
    13:0:183.000000
    13:1:184.000000
    13:2:185.000000
    13:3:186.000000
    13:4:187.000000
    13:5:188.000000
    13:6:189.000000
    13:7:190.000000
    13:8:191.000000
    13:9:192.000000
    13:10:193.000000
    13:11:194.000000
    13:12:195.000000
    13:13:196.000000
    14:0:211.000000
    14:1:212.000000
    14:2:213.000000
    14:3:214.000000
    14:4:215.000000
    14:5:216.000000
    14:6:217.000000
    14:7:218.000000
    14:8:219.000000
    14:9:220.000000
    14:10:221.000000
    14:11:222.000000
    14:12:223.000000
    14:13:224.000000
    14:14:225.000000
    15:0:241.000000
    15:1:242.000000
    15:2:243.000000
    15:3:244.000000
    15:4:245.000000
    15:5:246.000000
    15:6:247.000000
    15:7:248.000000
    15:8:249.000000
    15:9:250.000000
    15:10:251.000000
    15:11:252.000000
    15:12:253.000000
    15:13:254.000000
    15:14:255.000000
    15:15:256.000000
    

    注意,复制带有动态数据成员的结构时要小心。复制结构本身,即像上面的“#pragma acc exit data copyout(data[0:1])”,将用设备地址覆盖“x”的主机地址。相反,只复制“data->x”并删除“data”。

    【讨论】:

    • 谢谢,这真的很有帮助!
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2020-08-27
    • 1970-01-01
    • 1970-01-01
    • 2016-12-06
    • 2013-02-12
    • 2013-03-13
    相关资源
    最近更新 更多