Merge pull request MobleyLab#104 from shuail/master

add few options for TI analysis and update the README file
jpotoff · Sep 21, 2017 · ccd01b4 · ccd01b4
2 parents 38bb6a0 + d8ce96e
commit ccd01b4
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -53,24 +53,33 @@ Help for `alchemical_analysis.py` (obtained with `alchemical_analysis -h`) is:
   -h, --help            show this help message and exit
   -a SOFTWARE, --software=SOFTWARE
                         Package's name the data files come from: Gromacs,
-                        Sire, or AMBER. Default: Gromacs.
+                        Sire, Desmond, or AMBER. Default: Gromacs.
+  -b FRACTION, --fraction=FRACTION
+                        The fraction of the energy file will be used to
+                        calculate the statistics. Default: 1.0
   -c, --cfm             The Curve-Fitting-Method-based consistency inspector.
                         Default: False.
   -d DATAFILE_DIRECTORY, --dir=DATAFILE_DIRECTORY
                         Directory in which data files are stored. Default:
                         Current directory.
+  -e, --backward        Extract the energy data from the backward direction.
+                        Default: False
   -f BFORWREV, --forwrev=BFORWREV
-                        Plotting the free energy change as a function of time
-                        in both directions. The number of time points (an
-                        integer) is to be followed the flag. Default: 0
-  -g, --breakdown       Plotting the free energy differences evaluated for
-                        each pair of adjacent states for all methods. Default:
-                        False.
+                        Plot the free energy change as a function of time in
+                        both directions, with the specified number of points
+                        in the time plot. The number of time points (an
+                        integer) must be provided. Default: 0
+  -g, --breakdown       Plot the free energy differences evaluated for each
+                        pair of adjacent states for all methods, including the
+                        dH/dlambda curve for TI. Default: False.
   -i UNCORR_THRESHOLD, --threshold=UNCORR_THRESHOLD
-                        Perform the analysis with rather all the data if the
-                        number of uncorrelated samples is found to be less
-                        than this number. If 0 is given, the time series
-                        analysis will not be performed at all. Default: 50.
+                        Proceed with correlated samples if the number of
+                        uncorrelated samples is found to be less than this
+                        number. If 0 is given, the time series analysis will
+                        not be performed at all. Default: 50.
+  -j RESULTFILENAME, --resultfilename=RESULTFILENAME
+                        custom defined result filename prefix. Default:
+                        results
   -k BSKIPLAMBDAINDEX, --koff=BSKIPLAMBDAINDEX
                         Give a string of lambda indices separated by '-' and
                         they will be removed from the analysis. (Another
@@ -86,6 +95,14 @@ Help for `alchemical_analysis.py` (obtained with `alchemical_analysis -h`) is:
                         the other hand, will call [TI-CUBIC, GDEL]. 'all'
                         calls the full list of supported methods [TI, TI-
                         CUBIC, DEXP, IEXP, GINS, GDEL, BAR, UBAR, RBAR, MBAR].
+  -n UNCORR, --uncorr=UNCORR
+                        The observable to be used for the autocorrelation
+                        analysis; either 'dhdl_all' (obtained as a sum over
+                        all energy components) or 'dhdl' (obtained as a sum
+                        over those energy components that are changing;
+                        default) or 'dE'. In the latter case the energy
+                        differences dE_{i,i+1} (dE_{i,i-1} for the last
+                        lambda) are used.
   -o OUTPUT_DIRECTORY, --out=OUTPUT_DIRECTORY
                         Directory in which the output files produced by this
                         script will be stored. Default: Same as
@@ -117,7 +134,7 @@ Help for `alchemical_analysis.py` (obtained with `alchemical_analysis -h`) is:
                         BAR and MBAR. Default: 1e-10.
   -z INIT_WITH, --initialize=INIT_WITH
                         The initial MBAR free energy guess; either 'BAR' or
-                        'zeroes'. Default: 'BAR'.
+                        'zeros'. Default: 'BAR'.
 ```
 
 

diff --git a/alchemical_analysis/alchemical_analysis.py b/alchemical_analysis/alchemical_analysis.py
@@ -41,11 +41,14 @@
 
 parser = OptionParser()
 parser.add_option('-a', '--software', dest = 'software', help = 'Package\'s name the data files come from: Gromacs, Sire, Desmond, or AMBER. Default: Gromacs.', default = 'Gromacs')
+parser.add_option('-b', '--fraction', dest = 'fraction', help = 'The fraction of the energy file will be used to calculate the statistics. Default: 1.0',  default = 1.0, type = float)
 parser.add_option('-c', '--cfm', dest = 'bCFM', help = 'The Curve-Fitting-Method-based consistency inspector. Default: False.', default = False, action = 'store_true')
 parser.add_option('-d', '--dir', dest = 'datafile_directory', help = 'Directory in which data files are stored. Default: Current directory.', default = '.')
+parser.add_option('-e', '--backward', dest = 'backward', help = 'Extract the energy data from the backward direction. Default: False', default = False, action = 'store_true')
 parser.add_option('-f', '--forwrev', dest = 'bForwrev', help = 'Plot the free energy change as a function of time in both directions, with the specified number of points in the time plot. The number of time points (an integer) must be provided. Default: 0', default = 0, type=int)
 parser.add_option('-g', '--breakdown', dest = 'breakdown', help = 'Plot the free energy differences evaluated for each pair of adjacent states for all methods, including the dH/dlambda curve for TI. Default: False.', default = False, action = 'store_true')
 parser.add_option('-i', '--threshold', dest = 'uncorr_threshold', help = 'Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the time series analysis will not be performed at all. Default: 50.', default = 50, type=int)
+parser.add_option('-j', '--resultfilename', dest = 'resultfilename', help = 'custom defined result filename prefix. Default: results', default = 'results')
 parser.add_option('-k', '--koff', dest = 'bSkipLambdaIndex', help = 'Give a string of lambda indices separated by \'-\' and they will be removed from the analysis. (Another approach is to have only the files of interest present in the directory). Default: None.', default = '')
 parser.add_option('-m', '--methods', dest = 'methods', help = 'A list of the methods to esitimate the free energy with. Default: [TI, TI-CUBIC, DEXP, IEXP, BAR, MBAR]. To add/remove methods to the above list provide a string formed of the method strings preceded with +/-. For example, \'-ti_cubic+gdel\' will turn methods into [TI, DEXP, IEXP, BAR, MBAR, GDEL]. \'ti_cubic+gdel\', on the other hand, will call [TI-CUBIC, GDEL]. \'all\' calls the full list of supported methods [TI, TI-CUBIC, DEXP, IEXP, GINS, GDEL, BAR, UBAR, RBAR, MBAR].', default = '')
 parser.add_option('-n', '--uncorr', dest = 'uncorr', help = 'The observable to be used for the autocorrelation analysis; either \'dhdl_all\' (obtained as a sum over all energy components) or \'dhdl\' (obtained as a sum over those energy components that are changing; default) or \'dE\'. In the latter case the energy differences dE_{i,i+1} (dE_{i,i-1} for the last lambda) are used.', default = 'dhdl')
@@ -441,7 +444,6 @@ def TIprelim(lv):
    for k in range(K):
       ave_dhdl[k,:] = P.beta*numpy.average(dhdl[k,:,0:N_k[k]],axis=1)
       std_dhdl[k,:] = P.beta*numpy.std(dhdl[k,:,0:N_k[k]],axis=1)/numpy.sqrt(N_k[k]-1)
-
    return dlam, ave_dhdl, std_dhdl
 
 def getSplines(lchange):
@@ -639,6 +641,7 @@ def totalEnergies():
                      h = numpy.trim_zeros(dlam[:,j])
                      wsum = 0.5*(numpy.append(h,0) + numpy.append(0,h))
                      ddF[name] += numpy.dot(wsum**2,std_dhdl[lj,j]**2)
+
             ddF[name] = numpy.sqrt(ddF[name])
 
          else:
@@ -699,7 +702,7 @@ def printLine(str1, str2, d1=None, d2=None):
    else:
       printLine('%9s:  ' % segments[-1], str_dat, dFs[-1], ddFs[-1])
    # Store results.
-   outfile = open(os.path.join(P.output_directory, 'results.txt'), 'w')
+   outfile = open(os.path.join(P.output_directory, '%s.txt'%P.resultfilename), 'w')
    outfile.write('# Command line was: %s\n\n' % ' '.join(sys.argv) )
    outfile.writelines(outtext)
    outfile.close()
@@ -711,13 +714,13 @@ def printLine(str1, str2, d1=None, d2=None):
    P.ddFs     = ddFs
    P.dFs      = dFs
 
-   outfile = open(os.path.join(P.output_directory, 'results.pickle'), 'w')
+   outfile = open(os.path.join(P.output_directory, '%s.pickle'%P.resultfilename), 'w')
    pickle.dump(P, outfile)
    outfile.close()
 
    print '\n'+w*'*'
-   for i in [" The above table has been stored in ", " "+P.output_directory+"/results.txt ",
-      " while the full-precision data ", " (along with the simulation profile) in ", " "+P.output_directory+"/results.pickle "]:
+   for i in [" The above table has been stored in ", " "+P.output_directory+"/%s.txt "%P.resultfilename,
+      " while the full-precision data ", " (along with the simulation profile) in ", " "+P.output_directory+"/%s.pickle "%P.resultfilename]:
       print str_align.format('{:^40}'.format(i))
    print w*'*'
 
@@ -1244,6 +1247,7 @@ def main():
    #NML: Check for all zeros in data files
    #sliu: change the all zero check to let the calculation continue if there is only dhdlt. 
    all_zeros = not numpy.any(dhdlt) and not numpy.any(u_klt)
+
    if all_zeros == True:
       print "WARNING: Found all 0 in input data."
       zero_output(K,P)

diff --git a/alchemical_analysis/parser_amber.py b/alchemical_analysis/parser_amber.py
@@ -624,6 +624,7 @@ def readDataAmber(P):
     # FIXME: compute maximum number of MBAR energy sections
     K = len(lvals)
     nsnapshots = [len(dvdl_all[clambda]) - start_from for clambda in lvals]
+    nsnapshots = [int(i*P.fraction) for i in nsnapshots]
     maxn = max(nsnapshots)
 
     # AMBER has currently only one global lambda value, hence 2nd dim = 1
@@ -646,10 +647,18 @@ def readDataAmber(P):
         raise SystemExit('ERROR: unknown units %s' % P.units)
 
     ave = []
+    std = []
 
     for i, clambda in enumerate(lvals):
         vals = dvdl_all[clambda][start_from:]
+        if P.backward:
+            end_position = len(vals)-int(P.fraction*len(vals))
+            vals = vals[end_position:]
+        else:
+            end_position = int(len(vals)*P.fraction)
+            vals = vals[:end_position]
         ave.append(np.average(vals) * Econv)
+        std.append(np.std(vals) * Econv)
 
         dhdlt[i][0][:len(vals)] = np.array(vals)