Skip to content

Commit

Permalink
In EXAMPLE/ global interface drivers, ask P0 generates random Xtrue a…
Browse files Browse the repository at this point in the history
…nd RHS, then broadcast to

every other processes
  • Loading branch information
xiaoyeli committed Nov 2, 2022
1 parent 324d65f commit 3859112
Show file tree
Hide file tree
Showing 17 changed files with 239 additions and 77 deletions.
14 changes: 12 additions & 2 deletions EXAMPLE/pddrive1_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,18 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
}

for (j = 0; j < nrhs; ++j)
for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb];

Expand Down
13 changes: 11 additions & 2 deletions EXAMPLE/pddrive2_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
}

/* Save a copy of the right-hand side. */
if ( !(b1 = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]");
Expand Down
13 changes: 11 additions & 2 deletions EXAMPLE/pddrive3_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
}

/* Save a copy of the right-hand side. */
if ( !(b1 = doubleMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]");
Expand Down
25 changes: 21 additions & 4 deletions EXAMPLE/pddrive4_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid1.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid1.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid1.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid1.comm );
}

if ( !(berr = doubleMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");
Expand Down Expand Up @@ -282,8 +291,16 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid2.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid2.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid2.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid2.comm );
}

if ( !(berr = doubleMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");
Expand Down
13 changes: 11 additions & 2 deletions EXAMPLE/pddrive_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
dGenXtrue_dist(n, nrhs, xtrue, ldx);
dFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_DOUBLE, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_DOUBLE, 0, grid.comm );
}

if ( !(berr = doubleMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");
Expand Down
22 changes: 11 additions & 11 deletions EXAMPLE/psdrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,22 +162,22 @@ int main(int argc, char *argv[])
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}

// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
}
#endif
// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
// fflush(stdout);
Expand All @@ -192,11 +192,11 @@ int main(int argc, char *argv[])
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
Expand Down Expand Up @@ -359,7 +359,7 @@ int main(int argc, char *argv[])
MPI_Allreduce(MPI_IN_PLACE, result_min, 2, MPI_FLOAT,MPI_MIN, MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, result_max, 2, MPI_FLOAT,MPI_MAX, MPI_COMM_WORLD);
if (!myrank) {
printf("returning data:\n");
printf("Batch solves returning data:\n");
printf(" Factor time over all grids. Min: %8.4f Max: %8.4f\n",result_min[0], result_max[0]);
printf(" Solve time over all grids. Min: %8.4f Max: %8.4f\n",result_min[1], result_max[1]);
printf("**************************************************\n");
Expand Down
14 changes: 12 additions & 2 deletions EXAMPLE/psdrive1_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,18 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
}

for (j = 0; j < nrhs; ++j)
for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb];

Expand Down
13 changes: 11 additions & 2 deletions EXAMPLE/psdrive2_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
}

/* Save a copy of the right-hand side. */
if ( !(b1 = floatMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]");
Expand Down
13 changes: 11 additions & 2 deletions EXAMPLE/psdrive3_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
}

/* Save a copy of the right-hand side. */
if ( !(b1 = floatMalloc_dist(m * nrhs)) ) ABORT("Malloc fails for b1[]");
Expand Down
25 changes: 21 additions & 4 deletions EXAMPLE/psdrive4_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid1.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid1.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid1.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid1.comm );
}

if ( !(berr = floatMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");
Expand Down Expand Up @@ -282,8 +291,16 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid2.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid2.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid2.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid2.comm );
}

if ( !(berr = floatMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");
Expand Down
13 changes: 11 additions & 2 deletions EXAMPLE/psdrive_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,17 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
sGenXtrue_dist(n, nrhs, xtrue, ldx);
sFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, MPI_FLOAT, 0, grid.comm );
MPI_Bcast( b, m*nrhs, MPI_FLOAT, 0, grid.comm );
}

if ( !(berr = floatMalloc_dist(nrhs)) )
ABORT("Malloc fails for berr[].");
Expand Down
60 changes: 30 additions & 30 deletions EXAMPLE/pzdrive.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,27 +156,27 @@ int main(int argc, char *argv[])
#ifdef GPU_ACC
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}
/* Binding each MPI to a GPU device */
char *ttemp;
ttemp = getenv ("SUPERLU_BIND_MPI_GPU");

if (ttemp) {
int devs, rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // MPI_COMM_WORLD needs to be used here instead of SubComm
gpuGetDeviceCount(&devs); // Returns the number of compute-capable devices
gpuSetDevice(rank % devs); // Set device to be used for GPU executions
}

// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
// This is to initialize GPU, which can be costly.
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
// printf("grid.iam %5d, myrank %5d\n",grid.iam,myrank);
// fflush(stdout);
Expand All @@ -190,15 +190,15 @@ int main(int argc, char *argv[])
#ifdef GPU_ACC
int superlu_acc_offload = get_acc_offload();
if (superlu_acc_offload) {
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
double t1 = SuperLU_timer_();
gpuFree(0);
double t2 = SuperLU_timer_();
if(!myrank)printf("first gpufree time: %7.4f\n",t2-t1);
gpublasHandle_t hb;
gpublasCreate(&hb);
if(!myrank)printf("first blas create time: %7.4f\n",SuperLU_timer_()-t2);
gpublasDestroy(hb);
}
#endif
}
Expand Down Expand Up @@ -357,7 +357,7 @@ int main(int argc, char *argv[])
MPI_Allreduce(MPI_IN_PLACE, result_min, 2, MPI_FLOAT,MPI_MIN, MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, result_max, 2, MPI_FLOAT,MPI_MAX, MPI_COMM_WORLD);
if (!myrank) {
printf("returning data:\n");
printf("Batch solves returning data:\n");
printf(" Factor time over all grids. Min: %8.4f Max: %8.4f\n",result_min[0], result_max[0]);
printf(" Solve time over all grids. Min: %8.4f Max: %8.4f\n",result_min[1], result_max[1]);
printf("**************************************************\n");
Expand Down
14 changes: 12 additions & 2 deletions EXAMPLE/pzdrive1_ABglobal.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,18 @@ int main(int argc, char *argv[])
*trans = 'N';
ldx = n;
ldb = m;
zGenXtrue_dist(n, nrhs, xtrue, ldx);
zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

if ( iam==0 ) {
zGenXtrue_dist(n, nrhs, xtrue, ldx);
zFillRHS_dist(trans, nrhs, xtrue, ldx, &A, b, ldb);

MPI_Bcast( xtrue, n*nrhs, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm );
MPI_Bcast( b, m*nrhs, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm );
} else {
MPI_Bcast( xtrue, n*nrhs, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm );
MPI_Bcast( b, m*nrhs, SuperLU_MPI_DOUBLE_COMPLEX, 0, grid.comm );
}

for (j = 0; j < nrhs; ++j)
for (i = 0; i < m; ++i) b1[i+j*ldb] = b[i+j*ldb];

Expand Down
Loading

0 comments on commit 3859112

Please sign in to comment.