Add apparently working bitcell overlap detection and CBMC cell neighb…

…or list. Members of gathered overlap cells and cell neighbor lists are now filtered by proximity. CBMC cell list option would now be more appropriately called a cell neighbor list method, since the possible neighbors for a cell are now gathered and filtered by proximity. CBMC cells are now the same size as overlap cells; the gathering algorithm just searches more cells to capture all possible neighbors. Trial insertion of first fragment in CBMC are now greatly vectorized. CBMC dihedral trials are not yet, but applying vectorization and bitcell overlap detection to dihedral trials should be fairly straightforward. Dimension padding currently assumes vector size no greater than 256 bits (the size of AVX2 vector registers), and if we want Cassandra to support AVX-512, changes need to be made to accommodate that since it would violate the alignment assumptions made in some ifort compiler directives. While intermolecular CBMC energy estimation is vectorized when used with CBMC cell neighbor lists, it can apparently sometimes still be slightly slower than directly computing the energy, most likely due to slower memory access for the very large, precomputed energy table. I still left it as an option though because for more expensive force fields, it may be faster. Some cheap WRITE statements used for debugging are still present in the code and should probably be removed to avoid excessive verbosity, especially to STDOUT. Repeating an old simulation (from before this commit) using the same seeds and simulation options will not give identical results even with a single thread due to the way CBMC insertion trial positions are calculated from the random numbers differing from how it used to be done; for example, using rranf() - 0.5 instead of 0.5 - rranf() as fractional COM coordinate. Restricted insertion trial coordinates are now generated within the inner volume the first time, rather than being generated anywhere in the box and re-generating them within the inner volume them if they're outside the inner volume, as was done previously, and this process is now vectorized. Widom insertions will no longer be restricted ever, even if the inserted species is designated with restricted GCMC insertions. It's likely this was never a problem for anyone, but this fix should make sure it won't be a problem in the future. If restricted Widom insertions are ever allowed in the future, additional changes will need to be made for it to be done properly.
MaginnGroup · rwsmith7531 · Jul 21, 2022 · Jul 21, 2022 · Jul 21, 2022 · Aug 10, 2022
commit c11e3898f83cb7dcab1345538c72406cb8456b97
diff --git a/Src/atompair_nrg_table_routines.f90 b/Src/atompair_nrg_table_routines.f90
@@ -127,7 +127,8 @@ SUBROUTINE Allocate_Atompair_tables
                 wsolute_maxind = wsolute_nextbase
                 IF (precalc_atompair_nrg) THEN
                         ALLOCATE(typepair_nrg_table(atompair_nrg_res,0:solvent_ntypes,0:solute_ntypes,nbr_boxes))
-                        ALLOCATE(atompair_nrg_table(atompair_nrg_res,solvent_nextbase,solute_nextbase,nbr_boxes))
+                        ALLOCATE(atompair_nrg_table(atompair_nrg_res+1,solvent_nextbase,solute_nextbase,nbr_boxes))
+                        ALLOCATE(atompair_nrg_table_reduced(0:(atompair_nrg_res+1)*solvent_nextbase-1,solute_nextbase,nbr_boxes))
                         typepair_nrg_table = 0.0_DP
                 END IF
                 IF (est_atompair_rminsq) THEN
@@ -163,6 +164,8 @@ SUBROUTINE Create_Atompair_Nrg_table
                 nsolutes = 0
                 nsolvents = 0
                 rsq_step = (MAXVAL(rcut_cbmcsq)-rcut_lowsq)/atompair_nrg_res
+                inv_rsq_step = 1.0_DP/rsq_step
+                inv_rsq_step_sp = REAL(inv_rsq_step,SP)
                 rsq_shifter = rcut_lowsq - rsq_step
                 DO i = 1, atompair_nrg_res
                         rsq_lb_vector(i) = rsq_shifter + rsq_step*i
@@ -241,24 +244,29 @@ SUBROUTINE Create_Atompair_Nrg_table
                 !$OMP END PARALLEL
 
                 !$OMP WORKSHARE
-                atompair_nrg_table = typepair_nrg_table(:,solvent_typeindvec,solute_typeindvec,:)
+                atompair_nrg_table(1:atompair_nrg_res,:,:,:) = typepair_nrg_table(:,solvent_typeindvec,solute_typeindvec,:)
                 !$OMP END WORKSHARE
 
                 !$OMP PARALLEL DEFAULT(SHARED)
                 !$OMP DO COLLAPSE(3) SCHEDULE(STATIC)
                 DO ibox = 1, nbr_boxes
                         DO ti_solute = 1, solute_maxind
                                 DO ti_solvent = 1, solvent_maxind
-                                        atompair_nrg_table(:,ti_solvent,ti_solute,ibox) = &
-                                                atompair_nrg_table(:,ti_solvent,ti_solute,ibox) + &
+                                        atompair_nrg_table(1:atompair_nrg_res,ti_solvent,ti_solute,ibox) = &
+                                                atompair_nrg_table(1:atompair_nrg_res,ti_solvent,ti_solute,ibox) + &
                                                 f2(:,ibox)*cfqq(ti_solvent,ti_solute)
                                 END DO
                         END DO
                 END DO
                 !$OMP END DO
+                !$OMP WORKSHARE
+                atompair_nrg_table(atompair_nrg_res+1,:,:,:) = 0.0
+                atompair_nrg_table_reduced = REAL(RESHAPE(atompair_nrg_table, SHAPE(atompair_nrg_table_reduced)),SP)
+                !$OMP END WORKSHARE
                 !$OMP END PARALLEL
 
 
+
         END SUBROUTINE Create_Atompair_Nrg_table
 
         SUBROUTINE Setup_Atompair_tables

diff --git a/Src/create_nonbond_table.f90 b/Src/create_nonbond_table.f90
@@ -76,6 +76,7 @@ SUBROUTINE Create_Nonbond_Table
   REAL(DP) :: sixbycut, eps, sigma, negsigsq, negsigbyr2, rterm, rterm2
 
 !******************************************************************************
+  l_zerotype_present = .FALSE.
   IF (verbose_log) THEN
      WRITE(logunit,*)
      WRITE(logunit,'(A)') 'Nonbond tables'
@@ -145,6 +146,7 @@ SUBROUTINE Create_Nonbond_Table
         ELSE
            ! atom has no atom_type
            nonbond_list(ia,is)%atom_type_number = 0
+           l_zerotype_present = .TRUE.
         ENDIF
         ! Get maximum and minimum charge for atom type
         IF (repeat_type) THEN
@@ -216,6 +218,8 @@ SUBROUTINE Create_Nonbond_Table
   ALLOCATE(vdw_param5_table(0:nbr_atomtypes,0:nbr_atomtypes), Stat=AllocateStatus)
   ALLOCATE(ppvdwp_table(0:nbr_atomtypes,0:nbr_atomtypes,5,nbr_boxes))
   ALLOCATE(ppvdwp_table2(5,0:nbr_atomtypes,0:nbr_atomtypes,nbr_boxes))
+  ALLOCATE(ppvdwp_table_sp(0:nbr_atomtypes,0:nbr_atomtypes,5,nbr_boxes))
+  ALLOCATE(ppvdwp_table2_sp(5,0:nbr_atomtypes,0:nbr_atomtypes,nbr_boxes))
 
   IF (AllocateStatus .NE. 0) THEN
      err_msg = ''
@@ -690,6 +694,8 @@ SUBROUTINE Create_Nonbond_Table
                         vdw_param2_table ** vdw_param4_table
                 ppvdwp_table(:,:,1,ibox) = ppvdwp_table(:,:,1,ibox) * &
                         vdw_param2_table ** vdw_param3_table
+                l_nonuniform_exponents = ANY(vdw_param3_table(1:,1:) .NE. vdw_param3_table(1,1)) &
+                        .OR. ANY (vdw_param4_table(1:,1:) .NE. vdw_param4_table(1,1))
                 ppvdwp_table(:,:,3,ibox) = vdw_param3_table * -0.5_DP
                 ppvdwp_table(:,:,4,ibox) = vdw_param4_table * -0.5_DP
                 IF (int_vdw_sum_style(ibox) == vdw_cut_shift) THEN
@@ -709,8 +715,55 @@ SUBROUTINE Create_Nonbond_Table
   !shape2 = shape1(order2) ! wrong
 
   ppvdwp_table2 = RESHAPE(ppvdwp_table, SHAPE(ppvdwp_table2), ORDER=order2)
-
-  max_rmin = DSQRT(MAXVAL(rminsq_table))
-  sp_rminsq_table = REAL(rminsq_table,SP)
+  ppvdwp_table2_sp = REAL(ppvdwp_table2,SP)
+  ppvdwp_table_sp = REAL(ppvdwp_table,SP)
+
+  IF (calc_rmin_flag) THEN
+          max_rmin = DSQRT(MAXVAL(rminsq_table))
+          sp_rminsq_table = REAL(rminsq_table,SP)
+          ALLOCATE(atomtype_max_rminsq(0:nbr_atomtypes))
+          ALLOCATE(atomtype_min_rminsq(0:nbr_atomtypes))
+          ALLOCATE(atomtype_max_rminsq_sp(0:nbr_atomtypes))
+          atomtype_max_rminsq = MAXVAL(rminsq_table(:, &
+                  which_true_from_zero(l_wsolute_atomtype(),nbr_atomtypes+1)),2)
+          atomtype_min_rminsq = MINVAL(rminsq_table(:, &
+                  which_true_from_zero(l_wsolute_atomtype(),nbr_atomtypes+1)),2)
+          atomtype_max_rminsq_sp = REAL(atomtype_max_rminsq,SP)
+          box_list%ideal_bitcell_length = SQRT(MAXVAL(atomtype_min_rminsq)) / 28.0_DP ! RHS scalar LHS vector with one element per box
+          solvents_or_types_maxind = nbr_atomtypes+1
+  ELSE
+          box_list%ideal_bitcell_length = rcut_lowsq / 28.0_DP
+          solvents_or_types_maxind = 0
+  END IF
+  CONTAINS
+          FUNCTION l_wsolute_atomtype()
+                  LOGICAL, DIMENSION(0:nbr_atomtypes) :: l_wsolute_atomtype
+                  INTEGER :: is
+                  l_wsolute_atomtype = .FALSE.
+                  DO is = 1, nspecies
+                        IF (species_list(is)%l_wsolute) THEN
+                                DO ia = 1, natoms(is)
+                                        l_wsolute_atomtype(nonbond_list(ia,is)%atom_type_number) = .TRUE.
+                                END DO
+                        END IF
+                  END DO
+          END FUNCTION l_wsolute_atomtype
+          FUNCTION which_true_from_zero(lvec,nl)
+                  INTEGER :: nl
+                  LOGICAL, DIMENSION(0:nl-1) :: lvec
+                  INTEGER :: nt
+                  INTEGER, DIMENSION(COUNT(lvec)) :: which_true_from_zero
+                  INTEGER :: i, tcount
+                  nt = COUNT(lvec)
+                  i = 0
+                  tcount = 0
+                  DO WHILE (tcount < nt)
+                        IF (lvec(i)) THEN
+                                tcount = tcount + 1
+                                which_true_from_zero(tcount) = i
+                        END IF
+                        i = i + 1
+                  END DO
+          END FUNCTION which_true_from_zero
 
 END SUBROUTINE Create_Nonbond_Table