【问题标题】:Unexpected distribution of columns when using using MPI_Scatter with MPI_Type_vector将 MPI_Scatter 与 MPI_Type_vector 一起使用时列的意外分布
【发布时间】:2021-07-25 19:03:05
【问题描述】:

我正在尝试使用 MPI 在 N 个进程中划分 2D 矩阵的列。对于模板,我使用了MPI_Scatter - sending columns of 2D array 上的示例。

我的代码:

//HEADERS
    char** board_initialize(int n, int m)
    {
        int k, l;

        char* bd = (char*)malloc(sizeof(char) * n * m);
        char** b = (char**)malloc(sizeof(char*) * n);
        for (k = 0; k < n; k++)
            b[k] = &bd[k * m];

        for (k = 0; k < n; k++)
            for (l = 0; l < m; l++)
                b[k][l] = rand() < 0.25 * RAND_MAX;

        return b;
    }

    void board_print(char** b, int n, int m)
    {
        int k, l;

    //  system("@cls||clear");
        for (k = 0; k < n; k++)
        {
            for (l = 0; l < m; l++)
                printf("%d", b[k][l]);
            printf("\n");
        }
        printf("\n");
    }

    int main(int argc, char* argv[])
    {   
        int N = 10;
        int i, j;

        char * boardptr = NULL;                 // ptr to board
        char ** board;                          // board, 2D matrix, contignous memory allocation!

        int procs, myid;            
        int mycols;
        char ** myboard;                        // part of board that belongs to a process

        MPI_Init(&argc, &argv);                 // initiailzation
        
        MPI_Comm_rank(MPI_COMM_WORLD, &myid);   // process ID
        MPI_Comm_size(MPI_COMM_WORLD, &procs);  // number of processes

        // initialize global board
        if (myid == 0)
        {
            srand(1573949136);
            board = board_initialize(N, N);
            boardptr = *board;
            board_print(board, N, N);
        }
        // divide work
        mycols = N / procs;


        // initialize my structures
        myboard = board_initialize(N,mycols);

        MPI_Datatype column_not_resized, column_resized;
        MPI_Type_vector(N, 1, N, MPI_CHAR, &column_not_resized);
        MPI_Type_commit(&column_not_resized);
        MPI_Type_create_resized(column_not_resized, 0, 1*sizeof(char), &column_resized);
        MPI_Type_commit(&column_resized);

        // scatter initial matrix
        MPI_Scatter(boardptr, mycols, column_resized, *myboard, mycols, column_resized, 0, MPI_COMM_WORLD);
        MPI_Barrier(MPI_COMM_WORLD);

        board_print(myboard, N, mycols);
        
        MPI_Finalize();         // finalize MPI

        return 0;
    }

整个棋盘是这样的:

0000010010
0100000000
0000101100
0101000010
1000000100
0000010010
0000001110
0110000100
0000100000
0100010010

如果我使用 2 个进程,我希望每个进程将得到一半(第一个进程列 1-5 和第二个进程列 6-10)。但是,如果我打印两个进程的 myboard,我会得到一些奇怪的结果:

proc0:       proc1:
0 0 0 0 0    1 0 0 1 0 
0 0 1 0 0    0 0 0 1 1 
0 1 0 0 0    0 0 0 0 0 
0 1 0 0 0    0 1 0 1 0 
0 0 0 0 1    0 1 1 0 0 
0 1 1 0 0    0 0 1 0 0 
0 1 0 1 0    0 0 0 1 0 
0 0 0 0 1    0 0 1 0 0 
1 0 0 0 0    0 0 1 0 0 
0 1 0 0 0    0 1 0 0 0 

这可能是一个愚蠢的错误,但我似乎无法找到它。任何帮助将非常感激。

注意:proc1 的输出可能只是一些垃圾,因为我每次运行都会得到不同的输出。

【问题讨论】:

    标签: c performance parallel-processing mpi scatter-matrix


    【解决方案1】:

    您忘记区分发送和接收类型,即MPI_Type_vector 的参数将取决于适当的类型。您需要执行以下操作:

    MPI_Datatype acol, acoltype, bcol, bcoltype;
    if (myid == 0) {
        MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
        MPI_Type_commit(&acol);
        MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
    }
    MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
    MPI_Type_commit(&bcol);
    MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
    MPI_Type_commit(&bcoltype);
    MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
    

    从发送方的角度来看,您要创建N 块(ie,N 行),大小为 1,步长为 Nie, N 列)。因此:

    MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
    

    从接收者的角度来看,您要创建N 块(ie,N 行),大小为 1,步长为 mycols ie, mycols 列)。因此:

    MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
    

    旁注你确实需要 MPI_Scatter 之后的 MPI_Barrier,因为后者已经是一个阻塞调用。

    最终代码如下所示:

    #include <stdio.h>
    #include <stdlib.h>
    #include <mpi.h>
    
        char** board_initialize(int n, int m)
        {
            int k, l;
    
            char* bd = (char*)malloc(sizeof(char) * n * m);
            char** b = (char**)malloc(sizeof(char*) * n);
            for (k = 0; k < n; k++)
                b[k] = &bd[k * m];
    
            for (k = 0; k < n; k++)
                for (l = 0; l < m; l++)
                    b[k][l] = rand() < 0.25 * RAND_MAX;
    
            return b;
        }
    
        void board_print(char** b, int n, int m)
        {
            int k, l;
    
        //  system("@cls||clear");
            for (k = 0; k < n; k++)
            {
                for (l = 0; l < m; l++)
                    printf("%d", b[k][l]);
                printf("\n");
            }
            printf("\n");
        }
    
    
       int main(int argc, char* argv[])
        {   
            int N = 10;
            int i, j;
    
            char * boardptr = NULL;                 // ptr to board
            char ** board;                          // board, 2D matrix, contignous memory allocation!
    
            int procs, myid;            
            int mycols;
            char ** myboard;                        // part of board that belongs to a process
    
            MPI_Init(&argc, &argv);                 // initiailzation
            
            MPI_Comm_rank(MPI_COMM_WORLD, &myid);   // process ID
            MPI_Comm_size(MPI_COMM_WORLD, &procs);  // number of processes
    
            // initialize global board
            if (myid == 0)
            {
                srand(1573949136);
                board = board_initialize(N, N);
                boardptr = *board;
                board_print(board, N, N);
            }
            // divide work
            mycols = N / procs;
    
    
            // initialize my structures
            myboard = board_initialize(N,mycols);
        MPI_Datatype acol, acoltype, bcol, bcoltype;
    
        if (myid == 0) {
                MPI_Type_vector(N,    
                    1,                  
                    N,         
                    MPI_CHAR,       
                    &acol);       
                    MPI_Type_commit(&acol);
                MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
        }
        MPI_Type_vector(N,    
                   1,                  
                   mycols,         
                   MPI_CHAR,       
                   &bcol);       
    
        MPI_Type_commit(&bcol);
        MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
        MPI_Type_commit(&bcoltype);
        
    
        MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
            board_print(myboard, N, mycols);
            
            MPI_Finalize();         // finalize MPI
    
            return 0;
        }
    

    另外,在 IMO 中更容易的是按行而不是按列划分,代码如下所示:

    int myrows = N / procs;
    myboard = board_initialize(myrows, N);
    MPI_Scatter (boardptr, myrows, MPI_CHAR, *myboard, myrows, MPI_CHAR, 0, MPI_COMM_WORLD);
    board_print(myboard, myrows, N);
    

    【讨论】:

      猜你喜欢
      • 2017-07-09
      • 1970-01-01
      • 2021-01-30
      • 2017-04-29
      • 2021-03-21
      • 2014-07-12
      • 2015-06-07
      • 1970-01-01
      • 2015-06-02
      相关资源
      最近更新 更多