Dear all,
I am running umbrella sampling productions using a dihedral angle as reaction coordinate. By eye, the histograms of the angles of the umbrella productions overlap well in most of the cases. I wrote myself a script that checks for overlap using quantiles. I currently test if the 5th 5-quantile of production n is greater than the 1th 5-quantile of production n+1. I wonder if that test is too strict? How much overlap is intended/enough?
This is my python script that you can also find here: https://github.com/Gimba/UmbrellaSamplingUtils/blob/master/check_overlap.py
import glob
import matplotlib.pyplot as plt
import numpy as np
def main():
fig = plt.figure(figsize=(15, 10))
data = {}
files = []
for file in glob.glob("*.dat"):
files.append(file)
files = sorted(files, key=lambda x: int(x.split('_')[-1].split('.')[0]))
for file in files:
with open(file, 'r') as f:
file_number = int(file.split('_')[-1].split('.')[0])
data[file_number] = []
for line in f.readlines():
dat = line.split()[-1]
data[file_number].append(float(dat))
# plot and annotate histograms
y, x, _ = plt.hist(data[file_number], bins=100, histtype='step')
y_coord = max(y)
x_coord = x[np.where(y == max(y))[0][0]]
plt.annotate(str(file_number), (x_coord, y_coord), ha='center')
plt.xlabel('dihedral angle')
plt.ylabel('frequency')
# use quantiles to check proper overlap of distributions
with open('umbrella_stats.txt', 'w') as o:
o.write('file number <-> filenumber : sufficient overlap\n')
for key in sorted(data.keys()):
key_next = key + 1
if key_next in data.keys():
dat = np.asarray(data[key])
dat_next = np.asarray(data[key_next])
dat_quantile = np.quantile(dat, 0.8)
dat_next_quantile = np.quantile(dat_next, 0.2)
# plt.axvline(dat_quantile, color='red')
# plt.axvline(dat_next_quantile)
if (dat_quantile > dat_next_quantile):
o.write('{} <-> {} : True\n'.format(key, key_next))
else:
print('!!! {} <-> {} : False\n'.format(key, key_next))
o.write('{} <-> {} : False\n'.format(key, key_next))
plt.savefig('histograms.png')
if __name__ == '__main__':
main()
-------------------------------------------------------------------------
Martin Rosellen
Phd, UCL Division of Biosciences
Hansen Lab (UCL), Shepherd Group (Birkbeck)
UCL (University College London)
Birkbeck, University of London
E: martin.rosellen.16.ucl.ac.uk<mailto:martin.rosellen.16.ucl.ac.uk>
_______________________________________________
AMBER mailing list
AMBER.ambermd.org
http://lists.ambermd.org/mailman/listinfo/amber
Received on Mon Jan 06 2020 - 04:30:02 PST