【发布时间】:2015-02-16 20:10:24
【问题描述】:
我在使用 OpenMPI 编译此代码时遇到问题。由于我对使用 OpenMPI 的概念有点陌生,如果你们中的某个人可以在这里给我一个错误提示,那就太好了。 编译工作正常,但如果我运行代码,我会收到以下消息:
mpirun was unable to find the specified executable file, and therefore
did not launch the job. This error was first reported for process
rank 0; it may have occurred for other processes as well.
NOTE: A common cause for this error is misspelling a mpirun command
line parameter option (remember that mpirun interprets the first
unrecognized command line token as the executable).
我正在编译使用:
mpic++ matmult.cpp -o matmult
并运行它:
mpirun -n 2 matmult
...这里是使用的代码:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define MASTER 0
#define FROM_MASTER 1
#define FROM_WORKER 2
// ---------------------------------------------------------------------------
// allocate space for empty matrix A[row][col]
// access to matrix elements possible with:
// - A[row][col]
// - A[0][row*col]
float **alloc_mat(int row, int col)
{
float **A1, *A2;
A1 = (float **)calloc(row, sizeof(float *)); // pointer on rows
A2 = (float *)calloc(row*col, sizeof(float)); // all matrix elements
for (int i = 0; i < row; i++)
A1[i] = A2 + i*col;
return A1;
}
// ---------------------------------------------------------------------------
// random initialisation of matrix with values [0..9]
void init_mat(float **A, int row, int col)
{
for (int i = 0; i < row*col; i++)
A[0][i] = (float)(rand() % 10);
}
// ---------------------------------------------------------------------------
// DEBUG FUNCTION: printout of all matrix elements
void print_mat(float **A, int row, int col, char *tag)
{
int i, j;
printf("Matrix %s:\n", tag);
for (i = 0; i < row; i++)
{
for (j = 0; j < col; j++)
printf("%6.1f ", A[i][j]);
printf("\n");
}
}
// ---------------------------------------------------------------------------
int main(int argc, char *argv[]) {
int numtasks;
int taskid;
int numworkers;
int source;
int dest;
int mtype;
int rows;
int averow, extra, offset;
double starttime, endtime;
float **A, **B, **C; // matrices
int d1, d2, d3; // dimensions of matrices
int i, j, k, rc; // loop variables
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
if (argc != 4) {
printf ("Matrix multiplication: C = A x B\n");
printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
return 0;
}
if (numtasks < 2 ) {
printf("Need at least two MPI tasks. Quitting...\n");
MPI_Abort(MPI_COMM_WORLD,rc);
exit(1);
}
/* read user input */
d1 = atoi(argv[1]); // rows of A and C d1
d2 = atoi(argv[2]); // cols of A and rows of B d2
d3 = atoi(argv[3]); // cols of B and C d3
printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);
/* prepare matrices */
A = alloc_mat(d1, d2);
init_mat(A, d1, d2);
B = alloc_mat(d2, d3);
init_mat(B, d2, d3);
C = alloc_mat(d1, d3);
/* Code für den Manager */
if (taskid == MASTER) {
/*printf("matrix multiplikation withMPI\n");
printf("initializing arrays ...\n");
for (i=0; i<d1; i++)
for (j=0; j<d2; j++)
A[i][j]=i+j;
for (i=0; i<d2; i++)
for (j=0; j<d3; j++)
B[i][j]=i*j;*/
/* Matrizen versenden */
averow = d1/numworkers;
extra = d1%numworkers;
offset = 0;
mtype = FROM_MASTER;
starttime=MPI_Wtime();
for (dest=1;dest<=numworkers;dest++) {
rows = (dest <= extra) ? averow+1 :averow;
printf("Sending %drows to task %doffset=%d\n",rows,dest,offset);
MPI_Send(&offset, 1, MPI_INT,dest,mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT,dest,mtype, MPI_COMM_WORLD);
MPI_Send(&A[offset][0],rows*d2, MPI_DOUBLE,dest,mtype, MPI_COMM_WORLD);
MPI_Send(&B, d2*d3, MPI_DOUBLE,dest,mtype, MPI_COMM_WORLD);
offset =offset+rows;
}
/* Ergebnisse empfangen */
mtype = FROM_WORKER;
for (i=1; i<=numworkers; i++) {
source = i;
MPI_Recv(&offset, 1, MPI_INT,source,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT,source,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&C[offset][0],rows*d3,
MPI_DOUBLE,source,mtype,MPI_COMM_WORLD,&status);
printf("Received results from task %d\n",source);
}
endtime=MPI_Wtime();
printf("\nIt took %fseconds.\n",endtime-starttime);
}
/* Code für die Arbeiter */
if (taskid > MASTER) {
mtype = FROM_MASTER;
MPI_Recv(&offset, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&d1, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&A,rows*d2, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&B, d2*d3, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD, &status);
/* print user instruction */
// no initialisation of C, because it gets filled by matmult
/* serial version of matmult */
printf("Perform matrix multiplication...\n");
for (i = 0; i < d1; i++)
for (j = 0; j < d3; j++)
for (k = 0; k < d2; k++)
C[i][j] += A[i][k] * B[k][j];
mtype = FROM_WORKER;
MPI_Send(&offset, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD);
MPI_Send(&d1, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD);
MPI_Send(&C,rows*d3, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD);
}
MPI_Finalize();
/* test output
print_mat(A, d1, d2, "A");
print_mat(B, d2, d3, "B");
print_mat(C, d1, d3, "C"); */
printf ("\nDone.\n");
//return 0;
}
运行结果mpirun matmult(默认设置,单进程):
mpirun 已退出,因为节点上的进程等级为 0,PID 为 77202 juliuss-mbp-3 退出不正确。这可能有三个原因 发生:
此进程在退出前没有调用“init”,但其他进程在 工作做了。这可能会导致作业在等待时无限期挂起 让所有进程调用“init”。按照规则,如果一个进程调用 “init”,那么所有进程必须在终止之前调用“init”。
这个进程调用了“init”,但没有调用“finalize”就退出了。按照规则,所有调用“init”的进程都必须调用 在退出之前“完成”,否则将被视为“异常” 终止”
此过程称为“MPI_Abort”或“orte_abort”,mca 参数 orte_create_session_dirs 设置为 false。在这种情况下, 运行时无法检测到中止调用是异常的 终止。因此,您将收到的唯一错误消息是 一。这可能导致应用程序中的其他进程 由 mpirun 发送的信号终止(如此处所报告)。你可以 通过在 mpirun 命令行上指定 -quiet 来避免此消息。
【问题讨论】:
-
mpirun -n 2 matmult尝试在n之后添加p例如:mpirun -np 2 matmult -
不幸的是我得到了相同的编译器消息尝试这个@JonnyHenly
-
当你运行
mpic++ matmult.cpp -o matmult时,程序真的编译了吗?你试过用g++编译吗? -
它使用 mpic++ 编译得很好。使用 g++ 我遇到了与 OpenMPI 的链接问题。@JonnyHenly
-
我知道这听起来很简单,但请确保您与可执行文件位于同一目录中。也尝试运行
mpirun matmult(默认设置,单进程),看看错误是否仍然存在。