Skip to content

Commit

Permalink
more changes to insure each triplet IJK computed exactly once
Browse files Browse the repository at this point in the history
  • Loading branch information
Steven J. Plimpton committed Aug 27, 2018
1 parent c01fb5f commit 31ed3f7
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 4000 atoms
Time spent = 0.00126314 secs
Time spent = 0.00139618 secs

pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
pair_coeff * * lj/cut 1.0 1.0
Expand Down Expand Up @@ -60,26 +60,26 @@ Neighbor list info ...
bin: standard
Per MPI rank memory allocation (min/avg/max) = 11.47 | 11.47 | 11.47 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.033 -4.8899813 0 -3.3408686 -4.2298176
5 1.0337853 -4.8928208 0 -3.3425304 -4.2233154
10 1.0358056 -4.8953304 0 -3.3420104 -4.1897183
15 1.0380938 -4.8990457 0 -3.3422942 -4.1310148
20 1.0389566 -4.9014345 0 -3.3433892 -4.0406616
25 1.0358313 -4.8989663 0 -3.3456079 -3.9093019
Loop time of 12.2062 on 1 procs for 25 steps with 4000 atoms
0 1.033 -4.8404387 0 -3.291326 -4.1332095
5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
Loop time of 15.95 on 1 procs for 25 steps with 4000 atoms

Performance: 353.920 tau/day, 2.048 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 270.846 tau/day, 1.567 timesteps/s
100.0% CPU use with 1 MPI tasks x no OpenMP threads

MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 12.202 | 12.202 | 12.202 | 0.0 | 99.96
Pair | 15.946 | 15.946 | 15.946 | 0.0 | 99.97
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.0015621 | 0.0015621 | 0.0015621 | 0.0 | 0.01
Output | 0.00020814 | 0.00020814 | 0.00020814 | 0.0 | 0.00
Modify | 0.0019698 | 0.0019698 | 0.0019698 | 0.0 | 0.02
Other | | 0.0007734 | | | 0.01
Comm | 0.0015042 | 0.0015042 | 0.0015042 | 0.0 | 0.01
Output | 0.00013781 | 0.00013781 | 0.00013781 | 0.0 | 0.00
Modify | 0.0017776 | 0.0017776 | 0.0017776 | 0.0 | 0.01
Other | | 0.0006771 | | | 0.00

Nlocal: 4000 ave 4000 max 4000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Expand All @@ -97,4 +97,4 @@ Dangerous builds = 0

Please see the log.cite file for references relevant to this simulation

Total wall time: 0:00:13
Total wall time: 0:00:16
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 4000 atoms
Time spent = 0.000785112 secs
Time spent = 0.000900984 secs

pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
pair_coeff * * lj/cut 1.0 1.0
Expand Down Expand Up @@ -60,26 +60,26 @@ Neighbor list info ...
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.532 | 5.532 | 5.532 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.033 -4.8921547 0 -3.343042 -4.2340557
5 1.0337949 -4.8947881 0 -3.3444835 -4.2271456
10 1.0358286 -4.8973178 0 -3.3439632 -4.1935779
15 1.0381322 -4.9010593 0 -3.3442503 -4.134913
20 1.0390107 -4.9034854 0 -3.3453589 -4.0446162
25 1.0358988 -4.9010506 0 -3.3475908 -3.9133006
Loop time of 3.20632 on 4 procs for 25 steps with 4000 atoms
0 1.033 -4.8404387 0 -3.291326 -4.1332095
5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
Loop time of 4.34636 on 4 procs for 25 steps with 4000 atoms

Performance: 1347.340 tau/day, 7.797 timesteps/s
100.0% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 993.935 tau/day, 5.752 timesteps/s
99.6% CPU use with 4 MPI tasks x no OpenMP threads

MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.1207 | 3.1553 | 3.1859 | 1.5 | 98.41
Pair | 3.9977 | 4.1036 | 4.209 | 4.9 | 94.41
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.019466 | 0.05009 | 0.084602 | 12.0 | 1.56
Output | 7.1049e-05 | 8.2076e-05 | 0.00011325 | 0.0 | 0.00
Modify | 0.00056338 | 0.00057292 | 0.00058413 | 0.0 | 0.02
Other | | 0.0003092 | | | 0.01
Comm | 0.13588 | 0.24134 | 0.34722 | 20.4 | 5.55
Output | 0.00013757 | 0.00015104 | 0.00016761 | 0.0 | 0.00
Modify | 0.00087953 | 0.00091547 | 0.00095582 | 0.0 | 0.02
Other | | 0.0003656 | | | 0.01

Nlocal: 1000 ave 1000 max 1000 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Expand All @@ -97,4 +97,4 @@ Dangerous builds = 0

Please see the log.cite file for references relevant to this simulation

Total wall time: 0:00:03
Total wall time: 0:00:04
54 changes: 21 additions & 33 deletions src/MANYBODY/pair_atm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,15 @@ void PairATM::compute(int eflag, int vflag)
numneigh = list->numneigh;
firstneigh = list->firstneigh;

int count1 = 0;
int count2 = 0;
int count3 = 0;
// triple loop over local atoms and neighbors twice
// must compute each IJK triplet interaction exactly once
// by proc that owns the triplet atom with smallest x coord
// special logic to break ties if multiple atoms have same x or y coords
// inner two loops for jj=1,Jnum and kk=jj+1,Jnum insure
// the pair of other 2 non-minimum-x atoms is only considered once
// triplet geometry criteria for calculation:
// each pair distance <= cutoff
// produce of 3 pair distances <= cutoff_triple^3

for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
Expand All @@ -112,53 +118,44 @@ void PairATM::compute(int eflag, int vflag)
jnum = numneigh[i];
jnumm1 = jnum - 1;

// for (jj = 0; jj < jnumm1; jj++) {
// replace with this line:
for (jj = 0; jj < jnum; jj++) {

for (jj = 0; jj < jnumm1; jj++) {
j = jlist[jj];
j &= NEIGHMASK;

rij[0] = x[j][0] - xi;
if (rij[0] < 0.0) continue;
rij[1] = x[j][1] - yi;
if (rij[0] == 0.0 and rij[1] < 0.0) continue;
rij[2] = x[j][2] - zi;
if (rij[0] == 0.0 and rij[1] == 0.0 and rij[2] < 0.0) continue;
rij2 = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2];
count1++;
if (rij2 > cutoff_squared) continue;
count2++;

//for (kk = jj+1; kk < jnum; kk++) {
// replace with these two lines:
for (kk = 0; kk < jnum; kk++) {
if (kk == jj) continue;

for (kk = jj+1; kk < jnum; kk++) {
k = jlist[kk];
k &= NEIGHMASK;

rjk[0] = x[k][0] - x[j][0];
if (rjk[0] < 0.0) continue;
rjk[1] = x[k][1] - x[j][1];
if (rjk[0] == 0.0 and rjk[1] < 0.0) continue;
rjk[2] = x[k][2] - x[j][2];
if (rjk[0] == 0.0 and rjk[1] == 0.0 and rjk[2] < 0.0) continue;
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
if (rjk2 > cutoff_squared) continue;

rik[0] = x[k][0] - xi;
if (rik[0] < 0.0) continue;
rik[1] = x[k][1] - yi;
if (rik[0] == 0.0 and rik[1] < 0.0) continue;
rik[2] = x[k][2] - zi;
if (rik[0] == 0.0 and rik[1] == 0.0 and rik[2] < 0.0) continue;
rik2 = rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2];
if (rik2 > cutoff_squared) continue;

rjk[0] = x[k][0] - x[j][0];
rjk[1] = x[k][1] - x[j][1];
rjk[2] = x[k][2] - x[j][2];
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
if (rjk2 > cutoff_squared) continue;

double r6 = rij2*rjk2*rik2;
if (r6 > cutoff_triple_sixth) continue;

nu_local = nu[type[i]][type[j]][type[k]];
if (nu_local == 0.0) continue;

count3++;
interaction_ddd(nu_local,
r6,rij2,rik2,rjk2,rij,rik,rjk,fj,fk,eflag,evdwl);

Expand All @@ -177,15 +174,6 @@ void PairATM::compute(int eflag, int vflag)
}
}

int count = count1;
MPI_Allreduce(&count,&count1,1,MPI_INT,MPI_SUM,world);
count = count2;
MPI_Allreduce(&count,&count2,1,MPI_INT,MPI_SUM,world);
count = count3;
MPI_Allreduce(&count,&count3,1,MPI_INT,MPI_SUM,world);
printf("FORCE %g %d %d %d\n",cutoff_squared,count1,count2,count3);


if (vflag_fdotr) virial_fdotr_compute();
}

Expand Down

0 comments on commit 31ed3f7

Please sign in to comment.