#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2019 - 2024 Louis Vanduyfhuys <Louis.Vanduyfhuys@UGent.be>
# Center for Molecular Modeling (CMM), Ghent University, Ghent, Belgium;
# all rights reserved unless otherwise stated.
#
# This file is part of a library developed by Louis Vanduyfhuys at
# the Center for Molecular Modeling under supervision of prof. Veronique
# Van Speybroeck. Usage of this package should be authorized by prof. Van
# Vanduyfhuys or prof. Van Speybroeck.
from ..units import *
from ..constants import *
import matplotlib.pyplot as pp
import matplotlib.cm as cm
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
import time
from thermolib.thermodynamics.fep import BaseProfile, BaseFreeEnergyProfile
from thermolib.ext import wham1d_hs, wham1d_bias, wham1d_scf, wham1d_error, wham2d_hs, wham2d_bias, wham2d_scf, wham2d_error
from thermolib.error import GaussianDistribution, LogGaussianDistribution, Propagator, MultiGaussianDistribution, MultiLogGaussianDistribution
from thermolib.tools import fisher_matrix_mle_probdens, invert_fisher_to_covariance
__all__ = [
'Histogram1D', 'Histogram2D', 'plot_histograms'
]
[docs]
class Histogram1D(object):
'''
Class representing a 1D probability histogram :math:`p(CV)` in terms of a single collective variable :math:`CV`.
'''
def __init__(self, cvs, ps, error=None, cv_output_unit='au', cv_label='CV'):
'''
:param cvs: the bin values of the collective variable CV, which should be in atomic units!
:type cvs: np.ndarray
:param ps: the histogram probability values at the given CV values. The probabilities should be in atomic units!
:type ps: np.ndarray
:param error: error distribution on the free energy profile
:type error: child of :py:class:`Distribution <thermolib.error.Distribution>`, optional, default=None
:param cv_output_unit: the units for printing and plotting of CV values (not the unit of the input array, that is assumed to be in atomic units).
:type cv_output_unit: str, optional, default='au'
:param cv_label: label for the CV for printing and plotting
:type cv_label: str, optional, default='CV'
'''
self.cvs = cvs.copy()
self.ps = ps.copy()
self.error = None
if error is not None:
self.error = error.copy()
self.cv_output_unit = cv_output_unit
self.cv_label = cv_label
[docs]
def plower(self, nsigma=2):
'''
Return the lower limit of an n-sigma error bar on the histogram probability, i.e. :math:`\\mu - n\\sigma` with :math:`\\mu` the mean and :math:`\\sigma` the standard deviation.
:param nsigma: defines the n-sigma error bar
:type nsigma: int, optional, default=2
:return: the lower limit of the n-sigma error bar
:rtype: np.ndarray with dimensions determined by self.error
:raises AssertionError: if self.error is not defined.
'''
assert self.error is not None, 'Plower cannot be computed because no error distribution was defined in the error attribute'
plower, pupper = self.error.nsigma_conf_int(nsigma)
return plower
[docs]
def pupper(self, nsigma=2):
'''
Return the upper limit of an n-sigma error bar on the histogram probability, i.e. :math:`\\mu + n\\sigma` with :math:`\\mu` the mean and :math:`\\sigma` the standard deviation.
:param nsigma: defines the n-sigma error bar
:type nsigma: int, optional, default=2
:return: the upper limit of the n-sigma error bar
:rtype: np.ndarray with dimensions determined by self.error
:raises AssertionError: if self.error is not defined.
'''
assert self.error is not None, 'Pupper cannot be computed because no error distribution was defined in the error attribute'
plower, pupper = self.error.nsigma_conf_int(nsigma)
return pupper
[docs]
def copy(self):
'''
Make and return a copy of the current Histogram1D instance.
'''
if self.error is not None:
error = self.error.copy()
else:
error = None
return Histogram1D(self.cvs.copy(), self.ps.copy(), error=error, cv_output_unit=self.cv_output_unit, cv_label=self.cv_label)
[docs]
@classmethod
def from_average(cls, histograms, error_estimate=None, cv_output_unit=None, cv_label=None):
'''
Start from a set of 1D histograms and compute and return the averaged histogram (and optionally the error bar).
:param histograms: set of histrograms to be averaged
:type histograms: list of :py:class:`Histogram1D <thermolib.thermodynamics.histogram.Histogram1D>` instances
:param error_estimate: indicate how to perform error analysis. Currently, only one method is supported, 'std', which will compute the error bar from the standard deviation within the set of histograms.
:type error_estimate: str, optional, default=None
:param cv_output_unit: the unit in which cv will be plotted/printed. Defaults to the cv_output_unit of the first histogram given.
:type cv_output_unit: str, optional
:param cv_label: label for the new CV. Defaults to the cv_label of the first given histogram.
:type cv_label: str, optional
:raises AssertionError: if the histograms do not have a consistent CV grid
:raises AssertionError: if the histograms do not have a consistent CV label
'''
#sanity checks
cvs = None
for hist in histograms:
if cvs is None:
cvs = hist.cvs.copy()
else:
assert (abs(cvs-hist.cvs)<1e-12*np.abs(cvs.mean())).all(), 'Cannot average the histograms as they do not have consistent CV grids'
if cv_output_unit is None:
cv_output_unit = hist.cv_output_unit
if cv_label is None:
cv_label = hist.cv_label
else:
assert cv_label==hist.cv_label, 'Inconsistent CV label definition in histograms'
#collect probability distributions
pss = np.array([hist.ps for hist in histograms])
#average histograms
ps = pss.mean(axis=0)
#compute error if requested
error = None
if error_estimate is not None and error_estimate.lower() in ['std']:
perr = pss.std(axis=0,ddof=1)/np.sqrt(len(histograms))
error = GaussianDistribution(ps, perr)
return cls(cvs, ps,error=error, cv_output_unit=cv_output_unit, cv_label=cv_label)
[docs]
@classmethod
def from_single_trajectory(cls, data, bins, error_estimate=None, error_p_threshold=0.0, cv_output_unit='au', cv_label='CV'):
'''
Routine to estimate a 1D probability histogram in terms of a single collective variable from a series of samples of that collective variable.
:param data: series of samples of the collective variable. Should be in atomic units!
:type data: np.ndarray
:param bins: the edges of the bins for which a histogram will be constructed. This argument is parsed to `the numpy.histogram routine <https://numpy.org/doc/stable/reference/generated/numpy.histogram.html>`_. Hence, more information on its meaning and allowed values can be found there.
:type bins: np.ndarray
:param error_estimate: indicate if and how to perform error analysis. One of following options is available:
- **mle_p** - Estimating the error directly for the probability of each bin in the histogram. This method does not explicitly impose the positivity of the probability.
- **mle_p_cov** - Estimate the full covariance matrix for the probability of all bins in the histogram. In other words, appart from the error on the probability/free energy of a bin itself, we now also account for the covariance between the probabilty/free energy of the bins. This method does not explicitly impose the positivity of the probability.
- **mle_f** - Estimating the error for minus the logarithm of the probability, which is proportional to the free energy (hence f in mle_f). As the probability is expressed as :math:`\propto e^{-f}`, its positivity is explicitly accounted for.
- **mle_f_cov** - Estimate the full covariance matrix for minus the logarithm of the probability of all bins in the histogram. In other words, appart from the error on the probabilty/free energy of a bin itself (including explicit positivity constraint), we now also account for the covariance between the probability/free energy of the bins.
:type error_estimate: str or None, optional, default=None
:param error_p_threshold: only relevant when error estimation is enabled (see parameter ``error_estimate``). When ``error_p_threshold`` is set to x, bins in the histogram for which the probability resulting from the trajectory is smaller than x will be disabled for error estimation (i.e. its error will be set to np.nan). This is similar as the error_p_threshold keyword for the from_wham routine, for which the use is illustrated in :doc:`one of the tutorial notebooks <tut/advanced_projection>`.
:type error_p_threshold: float, optional, default=0.0
:param cv_output_unit: the unit in which cv will be plotted/printed (not the unit of the input array, that is assumed to be atomic units).
:type cv_output_unit: str, optional, default='au'
:param cv_label: label of the cv that will be used for plotting and printing
:type cv_label: str, optional, default='CV'
:raises ValueError: if no valid error_estimate value is given
'''
#initialization
cvs = None
ps = None
Ntot = len(data)
#generate histogram using numpy.histogram routine
ns, bin_edges = np.histogram(data, bins=bins, density=False)
cvs = 0.5*(bin_edges[:-1]+bin_edges[1:]) # bin centers
ps = ns/Ntot
#estimate of upper and lower boundary of n-sigma confidence interval
error = None
if error_estimate=='mle_p':
perr = np.sqrt(ps*(1-ps)/Ntot)
error = GaussianDistribution(ps, perr)
elif error_estimate=='mle_f':
#we first compute the error bar interval on f=-log(p) and then transform it to one on p itself.
fs = -np.log(ps)
ferr = np.sqrt((np.exp(fs)-1)/Ntot)
error = LogGaussianDistribution(-fs, ferr)
elif error_estimate=='mle_p_cov':
F = fisher_matrix_mle_probdens(ps, method='mle_p_cov')
cov = invert_fisher_to_covariance(F, ps, threshold=error_p_threshold)
error = MultiGaussianDistribution(ps, cov)
elif error_estimate=='mle_f_cov':
fs = -np.log(ps)
F = fisher_matrix_mle_probdens(ps, method='mle_f_cov')
cov = invert_fisher_to_covariance(F, ps, threshold=error_p_threshold)
error = MultiLogGaussianDistribution(-fs, cov)
elif error_estimate is not None:
raise ValueError('Invalid value for error_estimate argument, received %s. Check documentation for allowed values.' %error_estimate)
return cls(cvs, ps, error=error, cv_output_unit=cv_output_unit, cv_label=cv_label)
[docs]
@classmethod
def from_wham(cls, bins, traj_input, biasses, temp, error_estimate=None, corrtimes=None, error_p_threshold=0.0, bias_subgrid_num=20, Nscf=1000, convergence=1e-6, bias_thress=1e-3, cv_output_unit='au', cv_label='CV', verbosity='low'):
'''
Routine that implements the Weighted Histogram Analysis Method (WHAM) for reconstructing the overall 1D probability histogram in terms of collective variable CV from a series of molecular simulations that are (possibly) biased in terms of CV.
:param bins: number of bins for the CV grid or array representing the bin edges for th CV grid.
:type bins: int or np.ndarray(float)
:param traj_input: list of CV trajectories, one for each simulation. This input can be generated using the :py:meth:`read_wham_input <thermolib.tools.read_wham_input>` routine. The arguments trajectories and biasses should be of the same length.
:type trajectories: list of np.ndarrays
:param biasses: list of bias potentials, one for each simulation allowing to compute the bias at a given value of the collective variable in that simulation. This input can be generated using the :py:meth:`read_wham_input <thermolib.tools.read_wham_input>` routine. The arguments traj_input and biasses should be of the same length.
:type biasses: list(callable)
:param temp: the temperature at which all simulations were performed
:type temp: float
:param error_estimate: indicate if and how to perform error analysis. One of following options is available:
- **mle_p** - Estimating the error directly for the probability of each bin in the histogram. This method does not explicitly impose the positivity of the probability.
- **mle_p_cov** - Estimate the full covariance matrix for the probability of all bins in the histogram. In other words, appart from the error on the probability/free energy of a bin itself, we now also account for the covariance between the probabilty/free energy of the bins. This method does not explicitly impose the positivity of the probability.
- **mle_f** - Estimating the error for minus the logarithm of the probability, which is proportional to the free energy (hence f in mle_f). As the probability is expressed as :math:`\propto e^{-f}`, its positivity is explicitly accounted for.
- **mle_f_cov** - Estimate the full covariance matrix for minus the logarithm of the probability of all bins in the histogram. In other words, appart from the error on the probabilty/free energy of a bin itself (including explicit positivity constraint), we now also account for the covariance between the probability/free energy of the bins.
:type error_estimate: str or None, optional, default=None
:param corrtimes: list of (integrated) correlation times of the CV, one for each simulation. Such correlation times will be taken into account during the error estimation and hence make it more reliable. If set to None, the CV trajectories will be assumed to contain fully uncorrelated samples (which is not true when using trajectories representing each subsequent step from a molecular dynamics simulation). More information can be found in :ref:`the user guide <seclab_ug_errorestimation>`. This input can be generated using the :py:meth:`decorrelate <thermolib.tools.decorrelate>` routine. This argument needs to have the same length as the ``traj_input`` and ``biasses`` arguments.
:type corrtimes: list or np.ndarray, optional, default=None
:param error_p_threshold: only relevant when error estimation is enabled (see parameter ``error_estimate``). When ``error_p_threshold`` is set to x, bins in the histogram for which the probability resulting from the trajectory is smaller than x will be disabled for error estimation (i.e. its error will be set to np.nan). It is mainly usefull in the case of 2D histograms,as illustrated in :doc:`one of the tutorial notebooks <tut/advanced_projection>`.
:type error_p_threshold: float, optional, default=0.0
:param bias_subgrid_num: see documentation for this argument in the :py:meth:`wham1d_bias <thermolib.ext.wham1d_bias>` routine
:type bias_subgrid_num: int, optional, default=20
:param Nscf: the maximum number of steps in the self-consistent loop to solve the WHAM equations
:type Nscf: int, optional, default=1000
:param convergence: convergence criterium for the WHAM self consistent solver. The SCF loop will stop whenever the integrated absolute difference between consecutive probability densities is less then the specified value.
:type convergence: float, optional, default=1e-6
:param bias_thress: see documentation for the threshold argument in the :py:meth:`wham1d_bias <thermolib.ext.wham1d_bias>` routine
:type bias_thress:
:param verbosity: controls the level of verbosity for logging during the WHAM algorithm.
:type verbose: str, optional, allowed='none'|'low'|'medium'|'high', default='low'
:param cv_output_unit: the unit in which cv will be plotted/printed.
:type cv_output_unit: str, optional, default='au'
:param cv_label: the label of the cv that will be used on plots
:type cv_label: str, optional, default='CV'
:raises AssertionError: if traj_input and biasses are not of equal length
:raises AssertionError: if traj_input has an element that is not a numpy.ndarray
:raises AssertionError: if the CV grid defined by bins argument does not have a uniform spacing.
:raises ValueError: if an invalid definition for error_estimate is provided
'''
timings = {}
timings['start'] = time.time()
if verbosity.lower() in ['medium', 'high']:
print('Initialization ...')
#checks and initialization
assert len(biasses)==len(traj_input), 'The arguments traj_input and biasses should be of the same length.'
beta = 1/(boltzmann*temp)
Nsims = len(biasses)
#Preprocess trajectory argument: load files if file names are given instead of raw data, determine and store the number of simulation steps in each simulation:
if verbosity.lower() in ['high']:
print(' processing trajectories')
Nis = np.zeros(len(traj_input), dtype=int)
trajectories = np.empty(len(traj_input), dtype=object)
for i, trajectory in enumerate(traj_input):
if isinstance(trajectory, str):
trajectory = np.loadtxt(trajectory)[:,1] #first column is the time, second column is the CV
else:
assert isinstance(trajectory, np.ndarray), 'All trajectories should be numpy.ndarrays'
Nis[i] = len(trajectory)
trajectories[i] = trajectory
#Preprocess the bins argument and redefine it to represent the bin_edges. We need to do this beforehand to make sure that when calling the numpy.histogram routine with this bins argument, each histogram will have a consistent bin_edges array and hence consistent histogram.
if verbosity.lower() in ['high']:
print(' processing bins')
if isinstance(bins, int):
raise NotImplementedError
bin_centers = 0.5*(bins[:-1]+bins[1:])
deltas = bins[1:]-bins[:-1]
grid_non_uniformity = deltas.std()/deltas.mean()
assert grid_non_uniformity<1e-6, 'CV grid defined by bins argument should be of uniform spacing!'
delta = deltas.mean()
Ngrid = len(bin_centers)
timings['init'] = time.time()
#generate the individual histograms using numpy.histogram
if verbosity.lower() in ['medium', 'high']:
print('Constructing individual histograms for each biased simulation ...')
Hs = wham1d_hs(Nsims, Ngrid, trajectories, bins, Nis)
timings['hist'] = time.time()
#compute the integrated boltzmann factors of the biases in each grid interval
if verbosity.lower() in ['medium', 'high']:
print('Computing bias on grid ...')
bs = wham1d_bias(Nsims, Ngrid, beta, biasses, delta, bias_subgrid_num, bin_centers, threshold=bias_thress)
timings['bias'] = time.time()
#some init printing
if verbosity.lower() in ['high']:
print()
print('---------------------------------------------------------------------')
print('WHAM SETUP')
print(' number of simulations = ', Nsims)
for i, Ni in enumerate(Nis):
print(' simulation %i has %i steps' %(i,Ni))
print(' CV grid [%s]: start = %.3f end = %.3f delta = %.3f N = %i' %(cv_output_unit, bins.min()/parse_unit(cv_output_unit), bins.max()/parse_unit(cv_output_unit), delta/parse_unit(cv_output_unit), Ngrid))
print('---------------------------------------------------------------------')
print()
if verbosity.lower() in ['medium', 'high']:
print('Solving WHAM equations (SCF loop) ...')
ps, fs, converged = wham1d_scf(Nis, Hs, bs, Nscf=Nscf, convergence=convergence, verbose=verbosity.lower() in ['high'])
if verbosity.lower() in ['low', 'medium', 'high']:
if bool(converged):
print('SCF Converged!')
else:
print('SCF did not converge!')
timings['scf'] = time.time()
error = None
if error_estimate is not None and error_estimate in ['mle_p', 'mle_p_cov', 'mle_f', 'mle_f_cov']:
if verbosity.lower() in ['medium', 'high']:
print('Estimating error ...')
if corrtimes is None: corrtimes = np.ones(len(traj_input), float)
error = wham1d_error(Nsims, Ngrid, Nis, ps, fs, bs, corrtimes, method=error_estimate, verbosity=verbosity, p_threshold=error_p_threshold)
elif error_estimate is not None and error_estimate not in ["None"]:
raise ValueError('Received invalid value for keyword argument error_estimate, got %s. See documentation for valid choices.' %error_estimate)
timings['error'] = time.time()
if verbosity.lower() in ['low', 'medium', 'high']:
print('---------------------------------------------------------------------')
print('TIMING SUMMARY')
t = timings['init'] - timings['start']
print(' initializing: %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['hist'] - timings['init']
print(' histograms : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['bias'] - timings['hist']
print(' bias poten. : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['scf'] - timings['bias']
print(' solve scf : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['error'] - timings['scf']
print(' error est. : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['error'] - timings['init']
print(' TOTAL : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
print('---------------------------------------------------------------------')
return cls(bin_centers.copy(), ps, error=error, cv_output_unit=cv_output_unit, cv_label=cv_label)
[docs]
@classmethod
def from_wham_c(cls, bins, trajectories, biasses, temp, error_estimate=None, bias_subgrid_num=20, Nscf=1000, convergence=1e-6, cv_output_unit='au', cv_label='CV', verbosity='low'):
'''
.. deprecated:: v1.7
This routine sole purpose is backward compatibility and serves as an alias for from_wham. Please start using the from_wham routine as this routine will be removed in the near future.
'''
return cls.from_wham(bins, trajectories, biasses, temp, error_estimate=error_estimate, bias_subgrid_num=bias_subgrid_num, Nscf=Nscf, convergence=convergence, cv_output_unit=cv_output_unit, cv_label=cv_label, verbosity=verbosity)
[docs]
@classmethod
def from_fep(cls, fep, temp):
'''
Compute a probability histogram from the given free energy profile at the given temperature.
:param fep: free energy profile from which the probability histogram is computed
:type fep: fep.BaseFreeEnergyProfile/fep.SimpleFreeEnergyProfile
:param temp: the temperature at which the histogram input data was simulated
:type temp: float
'''
ps = np.zeros(len(fep.fs))
beta = 1.0/(boltzmann*temp)
ps[~np.isnan(fep.fs)] = np.exp(-beta*fep.fs[~np.isnan(fep.fs)] )
norm = ps.sum()
ps /= norm
error = None
if fep.error is not None:
means = -beta*fep.error.means-np.log(norm) #last term is to assure normalization consistent with that of ps attribute
stds = beta*fep.error.stds
error = LogGaussianDistribution(means, stds)
return cls(fep.cvs, ps, error=error, cv_output_unit=fep.cv_output_unit, cv_label=fep.cv_label)
[docs]
def plot(self, fn=None, obss=['value'], linestyles=None, linewidths=None, colors=None, cvlims=None, plims=None, show_legend=False, **plot_kwargs):
'''
Plot the probability histogram as function of the CV. If the error distribution is stored in self.error, various statistical quantities besides the estimated mean probabilty ietsel (such as the error width, lower/upper limit on the error bar, random sample) can be plotted using the ``obss`` keyword. You can specify additional matplotlib keyword arguments that will be parsed to the matplotlib plotter (`plot` and/or `fill_between`) at the end of the argument list of this routine.
:param fn: name of a file to save plot to. If None, the plot will not be saved to a file.
:type fn: str, optional, default=None
:param obss: Specify which statistical property/properties to plot. Multiple values are allowed, which will be plotted on the same figure. Following options are supported:
- **value** - the values stored in self.ps
- **mean** - the mean according to the error distribution, i.e. self.error.mean()
- **lower** - the lower limit of the 2-sigma error bar (which corresponds to a 95% confidence interval in case of a normal distribution), i.e. self.error.nsigma_conf_int(2)[0]
- **upper** - the upper limit of the 2-sigma error bar (which corresponds to a 95% confidence interval in case of a normal distribution), i.e. self.error.nsigma_conf_int(2)[1]
- **error** - half the width of the 2-sigma error bar (which corresponds to a 95% confidence interval in case of a normal distribution), i.e. abs(upper-lower)/2
- **sample** - a random sample taken from the error distribution, i.e. self.error.sample()
:type obss: list, optional, default=['value']
:param linestyles: Specify the line style (using matplotlib definitions) for each quantity requested in ``obss``. If None, all linestyles are set to '-'
:type linestyles: list or None, optional, default=None
:param linewidths: Specify the line width (using matplotlib definitions) for each quantity requested in ``obss``. If None, al linewidths are set to 1
:type linewidths: list of strings or None, optional, default=None
:param colors: Specify the color (using matplotlib definitions) for each quantity requested in ``obss``. If None, matplotlib will choose.
:type colors: list of strings or None, optional, default=None
:param cvlims: limits to the plotting range of the cv. If None, no limits are enforced
:type cvlims: list of strings or None, optional, default=None
:param plims: limits to the plotting range of the probability. If None, no limits are enforced
:type plims: list of strings or None, optional, default=None
:param show_legend: If True, the legend is shown in the plot
:type show_legend: bool, optional, default=None
:raises ValueError: if the ``obs`` argument contains an invalid specification
'''
#preprocess
cvunit = parse_unit(self.cv_output_unit)
punit = 1.0/cvunit
#read data
data = []
labels = []
for obs in obss:
values = None
if obs.lower() in ['value']:
values = self.ps.copy()
elif obs.lower() in ['error-mean', 'mean']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.mean()
elif obs.lower() in ['error-lower', 'lower']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.nsigma_conf_int(2)[0]
elif obs.lower() in ['error-upper', 'upper']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.nsigma_conf_int(2)[1]
elif obs.lower() in ['error-half-upper-lower', 'width']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
lower = self.error.nsigma_conf_int(2)[0]
upper = self.error.nsigma_conf_int(2)[1]
values = 0.5*np.abs(upper - lower)
elif obs.lower() in ['error-sample', 'sample']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.sample()
if values is None: raise ValueError('Could not interpret observable %s' %obs)
data.append(values)
labels.append(obs)
if self.error is not None:
lower, upper = self.error.nsigma_conf_int(2)
lower, upper = lower, upper
if linestyles is None:
linestyles = [None,]*len(data)
if linewidths is None:
linewidths = [1.0,]*len(data)
if colors is None:
colors = [None,]*len(data)
#make plot
pp.clf()
for (ys,label,linestyle,linewidth,color) in zip(data,labels,linestyles, linewidths,colors):
pp.plot(self.cvs/cvunit, ys/punit, label=label, linestyle=linestyle, linewidth=linewidth, color=color, **plot_kwargs)
if self.error is not None:
pp.fill_between(self.cvs/cvunit, lower/punit, upper/punit, **plot_kwargs, alpha=0.33)
pp.xlabel('%s [%s]' %(self.cv_label, self.cv_output_unit), fontsize=16)
pp.ylabel('Probability density [1/%s]' %(self.cv_output_unit), fontsize=16)
if cvlims is not None: pp.xlim(cvlims)
if plims is not None: pp.ylim(plims)
if show_legend:
pp.legend(loc='best')
fig = pp.gcf()
fig.set_size_inches([8,8])
fig.tight_layout()
if fn is not None:
pp.savefig(fn)
else:
pp.show()
return
[docs]
class Histogram2D(object):
'''
Class representing a 2D probability histogram :math:`p(CV1,CV2)` in terms of two collective variable :math:`CV1` and :math:`CV2`.
'''
def __init__(self, cv1s, cv2s, ps, error=None, cv1_output_unit='au', cv2_output_unit='au', cv1_label='CV1', cv2_label='CV2'):
'''
:param cv1s: the bin values of the first collective variable CV1, which should be in atomic units!
:type data: np.ndarray
:param cv2s: the bin values of the second collective variable CV2, which should be in atomic units!
:type data: np.ndarray
:param ps: 2D array corresponding to the histogram probability values at the given CV1,CV2 grid. The probabilities should be in atomic units!
:type bins: np.ndarray
:param error: error distribution on the free energy profile
:type error: child of :py:class:`Distribution <thermolib.error.Distribution>`, optional, default=None
:param cv1_output_unit: the units for printing and plotting of CV1 values (not the unit of the input array, that is assumed to be in atomic units).
:type cv1_output_unit: str, optional, default='au'
:param cv2_output_unit: the units for printing and plotting of CV2 values (not the unit of the input array, that is assumed to be in atomic units).
:type cv2_output_unit: str, optional, default='au'
:param cv1_label: label for CV1 for printing and plotting
:type cv1_label: str, optional, default='CV1'
:param cv2_label: label for CV2 for printing and plotting
:type cv2_label: str, optional, default='CV2
'''
self.cv1s = cv1s.copy()
self.cv2s = cv2s.copy()
self.ps = ps.copy()
self.error = None
if error is not None:
self.error = error.copy()
self.cv1_output_unit = cv1_output_unit
self.cv2_output_unit = cv2_output_unit
self.cv1_label = cv1_label
self.cv2_label = cv2_label
[docs]
def plower(self, nsigma=2):
'''
Return the lower limit of an n-sigma error bar on the histogram probability, i.e. :math:`\\mu - n\\sigma` with :math:`\\mu` the mean and :math:`\\sigma` the standard deviation.
:param nsigma: defines the n-sigma error bar
:type nsigma: int, optional, default=2
:return: the lower limit of the n-sigma error bar
:rtype: np.ndarray with dimensions determined by self.error
:raises AssertionError: if self.error is not defined.
'''
assert self.error is not None, 'Plower cannot be computed because no error distribution was defined in the error attribute'
plower, pupper = self.error.nsigma_conf_int(nsigma)
plower /= plower.sum()
return plower
[docs]
def pupper(self, nsigma=2):
'''
Return the upper limit of an n-sigma error bar on the histogram probability, i.e. :math:`\\mu + n\\sigma` with :math:`\\mu` the mean and :math:`\\sigma` the standard deviation.
:param nsigma: defines the n-sigma error bar
:type nsigma: int, optional, default=2
:return: the upper limit of the n-sigma error bar
:rtype: np.ndarray with dimensions determined by self.error
:raises AssertionError: if self.error is not defined.
'''
assert self.error is not None, 'Pupper cannot be computed because no error distribution was defined in the error attribute'
plower, pupper = self.error.nsigma_conf_int(nsigma)
pupper /= pupper.sum()
return pupper
[docs]
def copy(self):
'''
Make and return a copy of the current 2D probability histogram.
'''
error = None
if self.error is not None:
error = self.error.copy()
return Histogram2D(
self.cvs1.copy(), self.cvs2.copy(), self.ps.copy(), error=error,
cv1_output_unit=self.cv1_output_unit, cv2_output_unit=self.cv2_output_unit,
cv1_label=self.cv1_label, cv2_label=self.cv2_label
)
[docs]
@classmethod
def from_average(cls, histograms, error_estimate=None, cv1_output_unit=None, cv2_output_unit=None, cv1_label=None, cv2_label=None):
'''
Start from a set of 2D histograms and compute and return the averaged histogram (and optionally the error bar).
:param histograms: set of histrograms to be averaged
:type histograms: list of :py:class:`Histogram1D <thermolib.thermodynamics.histogram.Histogram1D>` instances
:param error_estimate: indicate how to perform error analysis. Currently, only one method is supported, 'std', which will compute the error bar from the standard deviation within the set of histograms.
:type error_estimate: str, optional, default=None
:param cv1_output_unit: the unit in which the new CV1 will be plotted/printed. Defaults to the cv1_output_unit of the first histogram given.
:type cv1_output_unit: str, optional
:param cv2_output_unit: the unit in which the new CV2 will be plotted/printed. Defaults to the cv2_output_unit of the first histogram given.
:type cv2_output_unit: str, optional
:param cv1_label: label for the new CV1. Defaults to the cv1_label of the first given histogram.
:type cv1_label: str, optional
:param cv2_label: label for the new CV2. Defaults to the cv2_label of the first given histogram.
:type cv2_label: str, optional
:raises AssertionError: if the histograms do not have a consistent CV1 grid
:raises AssertionError: if the histograms do not have a consistent CV2 grid
'''
#sanity checks
cv1s, cv2s = None, None
for hist in histograms:
if cv1s is None:
cv1s = hist.cv1s.copy()
else:
assert (abs(cv1s-hist.cv1s)<1e-12*np.abs(cv1s.mean())).all(), 'Cannot average the histograms as they do not have consistent CV1 grids'
if cv2s is None:
cv2s = hist.cv2s.copy()
else:
assert (abs(cv2s-hist.cv2s)<1e-12*np.abs(cv2s.mean())).all(), 'Cannot average the histograms as they do not have consistent CV2 grids'
if cv1_output_unit is None:
cv1_output_unit = hist.cv1_output_unit
if cv2_output_unit is None:
cv2_output_unit = hist.cv2_output_unit
if cv1_label is None:
cv1_label = hist.cv1_label
if cv2_label is None:
cv2_label = hist.cv2_label
#collect probability distributions
pss = np.array([hist.ps for hist in histograms])
#average histograms
ps = pss.mean(axis=0)
#compute error if requested
error = None
if error_estimate is not None and error_estimate.lower() in ['std']:
perr = pss.std(axis=0,ddof=1)
error = GaussianDistribution(ps, perr)
return cls(cv1s, cv2s, ps, error=error, cv1_output_unit=cv1_output_unit, cv2_output_unit=cv2_output_unit, cv1_label=cv1_label, cv2_label=cv2_label)
[docs]
@classmethod
def from_single_trajectory(cls, data, bins, error_estimate=None, cv1_output_unit='au', cv2_output_unit='au', cv1_label='CV1', cv2_label='CV2'):
'''
Routine to estimate a 2D probability histogram in terms of two collective variable from a series of samples of those two collective variables.
:param data: array representing series of samples of the two collective variables. The first column is assumed to correspond to the first collective variable, the second column to the second CV.
:type data: np.ndarray([Nsamples,2])
:param bins: array representing the edges of the bins for which a histogram will be constructed. This argument is parsed to `the numpy.histogram2d routine <https://numpy.org/doc/stable/reference/generated/numpy.histogram.html>`_. Hence, more information on its meaning and allowed values can be found there.
:type bins: np.ndarray
param error_estimate: indicate if and how to perform error analysis. One of following options is available:
- **mle_p** - Estimating the error directly for the probability of each bin in the histogram. This method does not explicitly impose the positivity of the probability.
- **mle_f** - Estimating the error for minus the logarithm of the probability, which is proportional to the free energy (hence f in mle_f). As the probability is expressed as :math:`\propto e^{-f}`, its positivity is explicitly accounted for.
:type error_estimate: str or None, optional, default=None
:param cv1_output_unit: the unit in which CV1 will be plotted/printed (not the unit of the input array, that is assumed to be atomic units).
:type cv1_output_unit: str, optional, default='au'
:param cv2_output_unit: the unit in which CV2 will be plotted/printed (not the unit of the input array, that is assumed to be atomic units).
:type cv2_output_unit: str, optional, default='au'
:param cv1_label: label of CV1 that will be used for plotting and printing
:type cv1_label: str, optional, default='CV1'
:param cv2_label: label of CV2 that will be used for plotting and printing
:type cv2_label: str, optional, default='CV2'
:raises ValueError: if no valid error_estimate value is given
'''
#initialization
cv1s, cv2s = None, None
ps = None
Ntot = len(data)
#generate histogram using numpy.histogram routine
ns, cv1_edges, cv2_edges = np.histogram2d(data[0,:], data[1,:], bins=bins, density=False)
cv1s = 0.5*(cv1_edges[:-1]+cv1_edges[1:]) # bin centers vor CV1
cv2s = 0.5*(cv2_edges[:-1]+cv2_edges[1:]) # bin centers vor CV2
ps = ns/Ntot
#estimate of upper and lower boundary of n-sigma confidence interval
error = None
if error_estimate=='mle_p':
perrs = np.sqrt(ps*(1-ps)/Ntot)
error = GaussianDistribution(ps, perrs)
elif error_estimate=='mle_f':
#we first compute the error bar interval on f=-log(p) using the MLE-F method...
fs, ferrors = np.zeros(len(ps))*np.nan, np.zeros(len(ps))*np.nan
fs[ps>0] = -np.log(ps[ps>0])
ferrors[ps>0] = np.sqrt((np.exp(fs[ps>0])-1)/Ntot)
#...and then use it to define a Log-Normal distributed error on p as a Normal distributed error on log(p)=-f
error = LogGaussianDistribution(-fs, ferrors)
elif error_estimate is not None:
raise ValueError('Invalid value for error_estimate argument, received %s. Check documentation for allowed values.' %error_estimate)
return cls(cv1s, cv2s, ps, error=error, cv1_output_unit=cv1_output_unit, cv2_output_unit=cv2_output_unit, cv1_label=cv1_label, cv2_label=cv2_label)
[docs]
@classmethod
def from_wham(cls, bins, traj_input, biasses, temp, pinit=None, error_estimate=None, error_p_threshold=0.0, corrtimes=None, bias_subgrid_num=20, Nscf=1000, convergence=1e-6, bias_thress=1e-3, overflow_threshold=1e-150, cv1_output_unit='au', cv2_output_unit='au', cv1_label='CV1', cv2_label='CV2', plot_biases=False, verbosity='low'):
'''
Routine that implements the Weighted Histogram Analysis Method (WHAM) for reconstructing the overall 2D probability histogram in terms of two collective variables CV1 and CV2 from a series of molecular simulations that are (possibly) biased in terms of CV1 and/or CV2.
:param bins: list of the form [bins1, bins2] where bins1 and bins2 are numpy arrays each representing the bin edges of their corresponding CV for which a histogram will be constructed. For example the following definition: [np.arange(-1,1.05,0.05), np.arange(0,5.1,0.1)] will result in a 2D histogram with bins of width 0.05 between -1 and 1 for CV1 and bins of width 0.1 between 0 and 5 for CV2.
:type bins: np.ndarray
:param traj_input: list of [CV1,CV2] trajectories, one for each simulation. This input can be generated using the :py:meth:`read_wham_input <thermolib.tools.read_wham_input>` routine. The arguments trajectories and biasses should be of the same length.
:type trajectories: list of np.ndarrays
:param biasses: list of bias potentials, one for each simulation allowing to compute the bias at given values of the collective variables CV1 and CV2 in that simulation. This input can be generated using the :py:meth:`read_wham_input <thermolib.tools.read_wham_input>` routine. The arguments traj_input and biasses should be of the same length.
:type biasses: list(callable)
:param temp: the temperature at which all simulations were performed
:type temp: float
:param pinit: initial guess for the probability density, which is assumed to be in the 'xy'-indexing convention (see the "indexing" argument and the corresponding "Notes" section in `the numpy online documentation of the meshgrid routine <https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html>`_). If None is given, a uniform distribution is used as initial guess.
:type pinit: np.ndarray, optional, default=None
:param error_estimate: indicate if and how to perform error analysis. One of following options is available:
- **mle_p** - Estimating the error directly for the probability of each bin in the histogram. This method does not explicitly impose the positivity of the probability.
- **mle_p_cov** - Estimate the full covariance matrix for the probability of all bins in the histogram. In other words, appart from the error on the probability/free energy of a bin itself, we now also account for the covariance between the probabilty/free energy of the bins. This method does not explicitly impose the positivity of the probability.
- **mle_f** - Estimating the error for minus the logarithm of the probability, which is proportional to the free energy (hence f in mle_f). As the probability is expressed as :math:`\propto e^{-f}`, its positivity is explicitly accounted for.
- **mle_f_cov** - Estimate the full covariance matrix for minus the logarithm of the probability of all bins in the histogram. In other words, appart from the error on the probabilty/free energy of a bin itself (including explicit positivity constraint), we now also account for the covariance between the probability/free energy of the bins.
:type error_estimate: str or None, optional, default=None
:param error_p_threshold: only relevant when error estimation is enabled (see parameter ``error_estimate``). When ``error_p_threshold`` is set to x, bins in the histogram for which the probability resulting from the trajectory is smaller than x will be disabled for error estimation (i.e. its error will be set to np.nan). Its use is illustrated in :doc:`one of the tutorial notebooks <tut/advanced_projection>`.
:type error_p_threshold: float, optional, default=0.0
:param corrtimes: list of (integrated) correlation times of the CVs, one for each simulation. Such correlation times will be taken into account during the error estimation and hence make it more reliable. If set to None, the CV trajectories will be assumed to contain fully uncorrelated samples (which is not true when using trajectories representing each subsequent step from a molecular dynamics simulation). More information can be found in :ref:`the user guide <seclab_ug_errorestimation>`. This input can be generated using the :py:meth:`decorrelate <thermolib.tools.decorrelate>` routine. This argument needs to have the same length as the ``traj_input`` and ``biasses`` arguments.
:type corrtimes: list or np.ndarray, optional, default=None
:param bias_subgrid_num: see documentation for this argument in the :py:meth:`wham2d_bias <thermolib.ext.wham2d_bias>` routine
:type bias_subgrid_num: optional, defaults to [20,20]
:param Nscf: the maximum number of steps in the self-consistent loop to solve the WHAM equations
:type Nscf: int, defaults to 1000
:param convergence: convergence criterium for the WHAM self consistent solver. The SCF loop will stop whenever the integrated absolute difference between consecutive probability densities is less then the specified value.
:type convergence: float, defaults to 1e-6
:param bias_thress: see documentation for this argument in the :py:meth:`wham1d_bias <thermolib.ext.wham1d_bias>` routine
:type bias_thress: double, optional, default=1e-3
:param overflow_threshold: see documentation for this argument in the :py:meth:`wham2d_scf <thermolib.ext.wham2d_scf>` routine
:type overflow_threshold: double, optional, default=1e-150
:param cv1_output_unit: the unit in which CV1 will be plotted/printed.
:type cv1_output_unit: str, optional, default='au'
:param cv2_output_unit: the unit in which CV2 will be plotted/printed.
:type cv2_output_unit: str, optional, default='au'
:param cv1_label: label of CV1 used for plotting and printing
:type cv1_label: str, optional, default='CV1'
:param cv2_label: label of CV2 used for plotting and printing
:type cv2_label: str, optional, default='CV2'
:param plot_biases: if set to True, a 2D plot of the boltzmann factor of the bias integrated over each bin will be made. This routine (mainly) exists for debugging purposes.
:type plot_biases: bool, optional, default=False
:param verbosity: controls the level of verbosity for logging during the WHAM algorithm.
:type verbose: str, optional, allowed='none'|'low'|'medium'|'high', default='low'
'''
timings = {}
timings['start'] = time.time()
if verbosity.lower() in ['medium', 'high']:
print('Initialization ...')
#checks and initialization
assert len(biasses)==len(traj_input), 'The arguments trajectories and biasses should be of the same length.'
beta = 1/(boltzmann*temp)
Nsims = len(biasses)
if isinstance(bias_subgrid_num, int):
bias_subgrid_num = [bias_subgrid_num, bias_subgrid_num]
else:
assert isinstance(bias_subgrid_num,list) or isinstance(bias_subgrid_num,np.ndarray), 'bias_subgrid_num argument should be an integer or a list of two integers'
assert len(bias_subgrid_num)==2, 'bias_subgrid_num argument should be an integer or a list of two integers'
assert isinstance(bias_subgrid_num[0],int), 'bias_subgrid_num argument should be an integer or a list of two integers'
assert isinstance(bias_subgrid_num[1],int), 'bias_subgrid_num argument should be an integer or a list of two integers'
#Preprocess trajectory argument: load files if file names are given instead of raw data, determine and store the number of simulation steps in each simulation:
if verbosity.lower() in ['high']:
print(' processing trajectories')
Nis = np.zeros(len(traj_input), dtype=int)
trajectories = np.empty(len(traj_input), dtype=object)
for i, trajectory in enumerate(traj_input):
if isinstance(trajectory, str):
trajectory = np.loadtxt(trajectory)[:,1:3] #first column is the time, second column is the CV1, third column is the CV2
else:
assert isinstance(trajectory, np.ndarray)
assert trajectory.shape[1]==2, 'Input trajectories parsed as numpy.ndarray should be have a second dimension of length 2, got %i' %(trajectory.shape[1])
Nis[i] = len(trajectory)
trajectories[i] = trajectory
#Preprocess the bins argument and redefine it to represent the bin_edges. We need to do this beforehand to make sure that when calling the numpy.histogram routine with this bins argument, each histogram will have a consistent bin_edges array and hence consistent histogram.
if verbosity.lower() in ['high']:
print(' processing bins')
assert isinstance(bins,list), "Bins argument should be a list of 2 numpy arrays with the corresponding bin edges. Current definition is not a list, but an object of type %s." %bins.__class__.__name__
assert len(bins)==2, "Bins argument should be a list of 2 numpy arrays with the corresponding bin edges. Current list definition does not have two elements, but %i." %(len(bins))
assert isinstance(bins[0],np.ndarray), "Bins argument should be a list of 2 numpy arrays with the corresponding bin edges. The first element in the current definition is not a numpy array but an object of type %s" %bins[0].__class__.__name__
assert isinstance(bins[1],np.ndarray), "Bins argument should be a list of 2 numpy arrays with the corresponding bin edges. The second element in the current definition is not a numpy array but an object of type %s" %bins[1].__class__.__name__
bin_centers1, bin_centers2 = 0.5*(bins[0][:-1]+bins[0][1:]), 0.5*(bins[1][:-1]+bins[1][1:])
deltas1, deltas2 = bins[0][1:]-bins[0][:-1], bins[1][1:]-bins[1][:-1]
grid_non_uniformity1, grid_non_uniformity2 = deltas1.std()/deltas1.mean(), deltas2.std()/deltas2.mean()
assert grid_non_uniformity1<1e-6, 'CV1 grid defined by bins argument should be of uniform spacing! Non-uniform grids not implemented.'
assert grid_non_uniformity2<1e-6, 'CV2 grid defined by bins argument should be of uniform spacing! Non-uniform grids not implemented.'
delta1, delta2 = deltas1.mean(), deltas2.mean()
Ngrid1, Ngrid2 = len(bin_centers1), len(bin_centers2)
Ngrid = Ngrid1*Ngrid2
timings['init'] = time.time()
#generate the individual histograms using numpy.histogram
if verbosity.lower() in ['medium', 'high']:
print('Constructing individual histograms for each biased simulation ...')
Hs = wham2d_hs(Nsims, Ngrid1, Ngrid2, trajectories, bins[0], bins[1], Nis)
timings['hist'] = time.time()
#compute the boltzmann factors of the biases in each grid interval
if verbosity.lower() in ['medium', 'high']:
print('Computing bias on grid ...')
bs = wham2d_bias(Nsims, Ngrid1, Ngrid2, beta, biasses, delta1, delta2, bias_subgrid_num[0], bias_subgrid_num[1], bin_centers1, bin_centers2, threshold=bias_thress)
if plot_biases:
for i, bias in enumerate(biasses):
bias.plot('bias_%i.png' %i, bin_centers1, bin_centers2)
timings['bias'] = time.time()
#some init printing
if verbosity.lower() in ['high']:
print()
print('---------------------------------------------------------------------')
print('WHAM SETUP')
print(' number of simulations = ', Nsims)
for i, Ni in enumerate(Nis):
print(' simulation %i has %i steps' %(i,Ni))
print(' CV1 grid [%s]: start = %.3f end = %.3f delta = %.3f N = %i' %(cv1_output_unit, bins[0].min()/parse_unit(cv1_output_unit), bins[0].max()/parse_unit(cv1_output_unit), delta1/parse_unit(cv1_output_unit), Ngrid1))
print(' CV2 grid [%s]: start = %.3f end = %.3f delta = %.3f N = %i' %(cv2_output_unit, bins[1].min()/parse_unit(cv2_output_unit), bins[1].max()/parse_unit(cv2_output_unit), delta2/parse_unit(cv2_output_unit), Ngrid2))
print('---------------------------------------------------------------------')
print()
if verbosity.lower() in ['medium', 'high']:
print('Solving WHAM equations (SCF loop) ...')
#self consistent loop to solve the WHAM equations
if pinit is None:
pinit = np.ones([Ngrid1,Ngrid2])/Ngrid
else:
#it is assumed pinit is given in 'xy'-indexing convention (such as for example when using the ps attribute of another Histogram2D instance)
assert pinit.shape[0]==Ngrid2, 'Specified initial guess should be of shape (%i,%i), got (%i,%i)' %(Ngrid2,Ngrid1,pinit.shape[0],pinit.shape[1])
assert pinit.shape[1]==Ngrid1, 'Specified initial guess should be of shape (%i,%i), got (%i,%i)' %(Ngrid2,Ngrid1,pinit.shape[0],pinit.shape[1])
#however, this routine is written in the 'ij'-indexing convention, therefore, we transpose pinit here.
pinit = pinit.T
pinit /= pinit.sum()
ps, fs, converged = wham2d_scf(Nis, Hs, bs, pinit, Nscf=Nscf, convergence=convergence, verbose=verbosity.lower() in ['high'], overflow_threshold=overflow_threshold)
if verbosity.lower() in ['low', 'medium', 'high']:
if bool(converged):
print(' SCF Converged!')
else:
print(' SCF did not converge!')
timings['scf'] = time.time()
#error estimation
error = None
if error_estimate is not None:
if verbosity.lower() in ['medium', 'high']:
print('Estimating error ...')
if corrtimes is None: corrtimes = np.ones(len(traj_input), float)
error = wham2d_error(ps, fs, bs, Nis, corrtimes, method=error_estimate, p_threshold=error_p_threshold, verbosity=verbosity)
timings['error'] = time.time()
if verbosity.lower() in ['low', 'medium', 'high']:
print()
print('---------------------------------------------------------------------')
print('TIMING SUMMARY')
t = timings['init'] - timings['start']
print(' initializing: %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['hist'] - timings['init']
print(' histograms : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['bias'] - timings['hist']
print(' bias poten. : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['scf'] - timings['bias']
print(' solve scf : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['error'] - timings['scf']
print(' error est. : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
t = timings['error'] - timings['init']
print(' TOTAL : %s' %(time.strftime('%Hh %Mm %S.{:03d}s'.format(int((t%1)*1000)), time.gmtime(t))))
print('---------------------------------------------------------------------')
return cls(bin_centers1.copy(), bin_centers2.copy(), ps, error=error, cv1_output_unit=cv1_output_unit, cv2_output_unit=cv2_output_unit, cv1_label=cv1_label, cv2_label=cv2_label)
[docs]
@classmethod
def from_wham_c(cls, bins, trajectories, biasses, temp, pinit=None, error_estimate=None, bias_subgrid_num=20, Nscf=1000, convergence=1e-6, cv1_output_unit='au', cv2_output_unit='au', cv1_label='CV1', cv2_label='CV2', plot_biases=False, verbose=None, verbosity='low'):
'''
.. deprecated:: v1.7
This routine sole purpose is backward compatibility and serves as an alias for from_wham. Please start using the from_wham routine as this routine will be removed in the near future.
'''
return cls.from_wham(bins, trajectories, biasses, temp, pinit=pinit, error_estimate=error_estimate, bias_subgrid_num=bias_subgrid_num, Nscf=Nscf, convergence=convergence, cv1_output_unit=cv1_output_unit, cv2_output_unit=cv2_output_unit, cv1_label=cv1_label, cv2_label=cv2_label, plot_biases=plot_biases, verbose=verbose, verbosity=verbosity)
[docs]
@classmethod
def from_fes(cls, fes, temp):
'''
Compute a probability histogram from the given free energy surface at the given temperature.
:param fes: free energy surfave from which the probability histogram is computed
:type fes: fep.FreeEnergySurface2D
:param temp: the temperature at which the histogram input data was simulated
:type temp: float
'''
assert isinstance(fes, FreeEnergySurface2D), 'Input argument fes should be type FreeEnergySurface2D'
ps = np.zeros(len(fes.fs))
beta = 1.0/(boltzmann*temp)
ps = np.exp(-beta*fes.fs)
ps /= ps[~np.isnan(ps)].sum()
error = None
if fes.error is not None:
raise NotImplementedError #TODO
return cls(fes.cv1s, fes.cv2s, ps, error=error, cv1_output_unit=fes.cv1_output_unit, cv2_output_unit=fes.cv2_output_unit, cv1_label=fes.cv1_label, cv2_label=fes.cv2_label)
[docs]
def average_cv_constraint_other(self, index, target_distribution=MultiGaussianDistribution, propagator=Propagator()):
'''
Routine to compute a profile representing the average of one CV (denoted as y/Y below, y for integration values and Y for resulting averaged values) as function of the other CV (denoted as x below), i.e. the other CV is contraint to one of its bin values x. This is done using the following formula:
.. math::
Y(x) = \\frac{\\int y\\cdot p(x,y) dy}{\\int p(x,y)dy}
:param index: the index of the CV which will be averaged (the other is then contraint). If index=1, then y=CV1 and x=CV2, while if index=2, then y=CV2 and x=CV1.
:type index: int (1 or 2)
:param target_distribution: model for the error distribution of the resulting profile.
:type target_distribution: child instance of :py:class:`Distribution <thermolib.error.Distribution>`, optional, default= :py:class:`MultiGaussianDistribution <thermolib.error.MultiGaussianDistribution>`
:param propagator: a Propagator used for error propagation. Can be usefull if one wants to adjust the error propagation settings (such as the number of random samples taken)
:type propagator: instance of :py:class:`Propagator <thermolib.error.Propagator>`, optional, default=Propagator()
:return: Profile of the average
:rtype: :py:class:`BaseProfile <thermolib.thermodynamics.fep.BaseProfile>`
:raises ValueError: if index is not 1 or 2
'''
#set x,y and p arrays according to chosen index
if index==1:
ys = self.cv1s
xs = self.cv2s
cv_output_unit = self.cv2_output_unit
f_output_unit = self.cv1_output_unit
cv_label = self.cv2_label
f_label = self.cv1_label
def function(ps_inp):
ps = ps_inp/ps_inp[~np.isnan(ps_inp)].sum()
Ys = np.zeros(len(xs))
for j in range(len(xs)):
mask = ~np.isnan(ps[:,j])
T = (ys[mask]*ps[mask,j]).sum()
N = (ps[mask,j]).sum()
try:
Ys[j] = T/N
except (FloatingPointError, ZeroDivisionError):
Ys[j] = np.nan
return Ys
elif index==2:
ys = self.cv2s
xs = self.cv1s
cv_output_unit = self.cv1_output_unit
f_output_unit = self.cv2_output_unit
cv_label = self.cv1_label
f_label = self.cv2_label
def function(ps_inp):
ps = ps_inp/ps_inp[~np.isnan(ps_inp)].sum()
Ys = np.zeros(len(xs))
for j in range(len(xs)):
mask = ~np.isnan(ps[j,:])
T = (ys[mask]*ps[j,mask]).sum()
N = (ps[j,mask]).sum()
try:
Ys[j] = T/N
except (FloatingPointError,ZeroDivisionError):
Ys[j] = np.nan
return Ys
else:
raise ValueError('Index should be 1 or 2 for 2D Histogram')
#compute average Y (and possibly its error) on grid of x
if self.error is not None:
error = propagator(function, self.error, target_distribution=target_distribution, samples_are_flattened=True)
Ys = error.mean()
else:
Ys = function(self.ps)
error = None
return BaseProfile(xs, Ys, error=error, cv_output_unit=cv_output_unit, f_output_unit=f_output_unit, cv_label=cv_label, f_label=f_label)
[docs]
def plot(self, fn=None, slicer=[slice(None),slice(None)], obss=['value'], linestyles=None, linewidths=None, colors=None, cv1_lims=None, cv2_lims=None, plims=None, ncolors=8, ref_max=False, **plot_kwargs):
'''
Make either a 2D contour plot of p(CV1,CV2) or a 1D sliced plot of the probability along a slice in the direction specified by the slicer argument. Appart from the value of the probability itself, other (statistical) related properties can be plotted as defined in the obbs argument. At the end of the argument list, you can also specify any matplotlib keyword arguments you wish to parse to the matplotlib plotter. E.g. if you want to specify the colormap, you can just add at the end of the arguments ``cmap='rainbow'``.
:param fn: name of a file to save plot to. If None, the plot will not be saved to a file.
:type fn: str, optional, default=None
:param slicer: determines which degrees of freedom (CV1/CV2) vary/stay fixed in the plot. If slice(none) is specified, the probability will be plotted as function of the corresponding CV. If an integer `i` is specified, that corresponding CV will be kept fixed at its `i-th` value. Some examples:
- [slice(None),slice(Nonne)] -- a 2D contour plot will be made of the probability as function of both CVs
- [slice(None),10] -- a 1D plot will be made of the probability as function of CV1 with CV2 fixed at self.cv2s[10]
- [23,slice(None)] -- a 1D plot will be made of the probability as function of CV2 with CV1 fixed at self.cv1s[23]
:type slicer: list of `slices <https://www.w3schools.com/python/ref_func_slice.asp>`_ or integers, optional, default=[slice(None),slice(None)]
:param obss: Specify which statistical property/properties to plot. Multiple values are allowed, which will be plotted on the same figure. Following options are supported:
- **value** - the values stored in self.ps
- **mean** - the mean according to the error distribution, i.e. self.error.mean()
- **lower** - the lower limit of the 2-sigma error bar (which corresponds to a 95% confidence interval in case of a normal distribution), i.e. self.error.nsigma_conf_int(2)[0]
- **upper** - the upper limit of the 2-sigma error bar (which corresponds to a 95% confidence interval in case of a normal distribution), i.e. self.error.nsigma_conf_int(2)[1]
- **error** - half the width of the 2-sigma error bar (which corresponds to a 95% confidence interval in case of a normal distribution), i.e. abs(upper-lower)/2
- **sample** - a random sample taken from the error distribution, i.e. self.error.sample()
:type obss: list, optional, default=['value']
:param linestyles: Specify the line style (using matplotlib definitions) for each quantity requested in ``obss``. If None, matplotlib will choose.
:type linestyles: list or None, optional, default=None
:param linewidths: Specify the line width (using matplotlib definitions) for each quantity requested in ``obss``. If None, matplotlib will choose.
:type linewidths: list of strings or None, optional, default=None
:param colors: Specify the color (using matplotlib definitions) for each quantity requested in ``obss``. If None, matplotlib will choose.
:type colors: list of strings or None, optional, default=None
:param cv1_lims: limits to the plotting range of CV1. If None, no limits are enforced
:type cv1_lims: list of strings or None, optional, default=None
:param cv2_lims: limits to the plotting range of CV2. If None, no limits are enforced
:type cv2_lims: list of strings or None, optional, default=None
:param plims: limits to the plotting range of the probability. If None, no limits are enforced
:type plims: list of strings or None, optional, default=None
:param ncolors: only relevant for 2D contour plot, represents the number of contours (and hence colors) to be used in plot.
:type ncolors: int, optional, default=8
:param ref_max: if True, the probability histogram(s) will be rescaled such that the maximum probability is 1.0
:type ref_max: bool, optional, default=False
'''
#preprocess
assert isinstance(slicer, list) or isinstance(slicer, np.ndarray), 'Slicer should be list or array, instead got %s' %(slicer.__class__.__name__)
assert len(slicer)==2, 'Slicer should be list of length 2, instead got list of length %i' %( len(slicer))
if isinstance(slicer[0], slice) and isinstance(slicer[1], slice):
ndim = 2
xs = self.cv1s[slicer[0]]/parse_unit(self.cv1_output_unit)
xlabel = self.cv1_label
xlims = cv1_lims
ys = self.cv2s[slicer[1]]/parse_unit(self.cv2_output_unit)
ylabel = '%s [%s]' %(self.cv2_label, self.cv2_output_unit)
ylims = cv2_lims
title = 'p(:,:)'
elif isinstance(slicer[0], slice) or isinstance(slicer[1],slice):
ndim = 1
if isinstance(slicer[0], slice):
xs = self.cv1s[slicer[0]]/parse_unit(self.cv1_output_unit)
xlabel = '%s [%s]' %(self.cv1_label, self.cv1_output_unit)
xlims = cv1_lims
title = 'p(:,%s[%i]=%.3f %s)' %(self.cv2_label,slicer[1],self.cv2s[slicer[1]]/parse_unit(self.cv2_output_unit),self.cv2_output_unit)
else:
xs = self.cv2s[slicer[1]]/parse_unit(self.cv2_output_unit)
xlabel = '%s [%s]' %(self.cv2_label, self.cv2_output_unit)
xlims = cv2_lims
title = 'p(%s[%i]=%.3f %s,:)' %(self.cv1_label,slicer[0],self.cv1s[slicer[0]]/parse_unit(self.cv1_output_unit),self.cv1_output_unit)
else:
raise ValueError('At least one of the two elements in slicer should be a slice instance!')
punit = 1.0/(parse_unit(self.cv1_output_unit)*parse_unit(self.cv2_output_unit))
#read data
data = []
labels = []
for obs in obss:
values = None
if obs.lower() in ['value']:
values = self.ps.copy()
elif obs.lower() in ['error-mean', 'mean']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.mean()
elif obs.lower() in ['error-lower', 'lower']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.nsigma_conf_int(2)[0]
elif obs.lower() in ['error-upper', 'upper']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.nsigma_conf_int(2)[1]
elif obs.lower() in ['error-half-upper-lower', 'width']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
lower = self.error.nsigma_conf_int(2)[0]
upper = self.error.nsigma_conf_int(2)[1]
values = 0.5*np.abs(upper - lower)
elif obs.lower() in ['error-sample', 'sample']:
assert self.error is not None, 'Observable %s can only be plotted if error is defined!' %obs
values = self.error.sample()
if values is None: raise ValueError('Could not interpret observable %s' %obs)
data.append(values)
labels.append(obs)
if ref_max:
values /= max(values)
data.append(values)
labels.append(obs)
if ndim==1 and self.error is not None:
lower, upper = self.error.nsigma_conf_int(2)
lower, upper = lower[slicer], upper[slicer]
if linestyles is None:
linestyles = [None,]*len(data)
if linewidths is None:
linewidths = [1.0,]*len(data)
if colors is None:
colors = [None,]*len(data)
#make plot
pp.clf()
if ndim==1:
for (zs,label,linestyle,linewidth,color) in zip(data,labels,linestyles, linewidths,colors):
pp.plot(xs, zs/punit, label=label, linestyle=linestyle, linewidth=linewidth, color=color, **plot_kwargs)
if self.error is not None:
pp.fill_between(xs, lower/punit, upper/punit, **plot_kwargs, alpha=0.33)
pp.xlabel(xlabel, fontsize=16)
pp.ylabel('Energy [%s]' %(self.f_output_unit), fontsize=16)
pp.title('Derived profiles from %s' %title, fontsize=16)
if xlims is not None: pp.xlim(xlims)
if plims is not None: pp.ylim(plims)
pp.legend(loc='best')
fig = pp.gcf()
fig.set_size_inches([8,8])
elif ndim==2:
if len(data)<4:
fig, axs = pp.subplots(1,len(data))
size = [8*len(data),8]
elif len(data)==4:
fig, axs = pp.subplots(2,2)
size = [16,16]
else:
nrows = int(np.ceil(len(data)/3))
fig, axs = pp.subplots(nrows,3)
size = [24,8*nrows]
for i, (multi_index, ax) in enumerate(np.ndenumerate(axs)):
if plims is not None:
delta = (plims[1]-plims[0])/ncolors
levels = np.arange(plims[0], plims[1]+delta, delta)
contourf = ax.contourf(xs, ys, data[i].T/punit, levels=levels, **plot_kwargs) #transpose data to convert ij indexing (internal) to xy indexing (for plotting)
contour = ax.contour(xs, ys, data[i].T/punit, levels=levels, colors='gray') #transpose data to convert ij indexing (internal) to xy indexing (for plotting)
else:
contourf = ax.contourf(xs, ys, data[i].T/punit, **plot_kwargs) #transpose data to convert ij indexing (internal) to xy indexing (for plotting)
contour = ax.contour(xs, ys, data[i].T/punit, colors='gray') #transpose data to convert ij indexing (internal) to xy indexing (for plotting)
ax.set_xlabel(xlabel, fontsize=16)
ax.set_ylabel(ylabel, fontsize=16)
ax.set_title('%s of %s' %(labels[i],title), fontsize=16)
if xlims is not None: ax.set_xlim(xlims)
if ylims is not None: ax.set_ylim(ylims)
cbar = pp.colorbar(contourf, ax=ax, extend='both')
cbar.set_label('Free energy [%s]' %self.f_output_unit, fontsize=16)
pp.clabel(contour, inline=1, fontsize=10)
fig.set_size_inches(size)
else:
raise ValueError('Can only plot 1D or 2D pcond data, but received %i-d data. Make sure that the combination of qslice and cvslice results in 1 or 2 dimensional data.' %(len(data.shape)))
fig.tight_layout()
if fn is not None:
pp.savefig(fn)
else:
pp.show()
return
[docs]
def plot_histograms(histograms, fn=None, temp=None, labels=None, flims=None, colors=None, linestyles=None, linewidths=None):
'''
Make a plot to compare multiple 1D probability histograms (and possibly teh corresponding free energy profiles)
:param histograms: list of histrograms to plot
:type histograms: list of :py:class:`Histogram1D <thermolib.thermodynamics.histogram.Histogram1D>` or child classes
:param fn: file name to save the figure to. If None, the plot will not be saved.
:type fn: str, optional, default=None
:param temp: if defined, the free energy profile corresponding to each histogram will be computed and plotted with its corresponding temperature. If a single float is given, all histograms are assumed to be at the same temperature, if a list or array of floats is given, each entry is assumed to be the temperature of the corresponding entry in the input list of histograms.
:type temp: float/list(float)/np.ndarray, optional, default=None
:param labels: list of labels for the legend, one for each histogram. Order is assumed to be consistent with profiles.
:type labels: list(str), optional, default=None
:param flims: [lower,upper] limit of the free energy axis in plots. Ignored if temp argument is not given.
:type flims: float, optional, default=None
:param colors: List of matplotlib color definitions for each entry in histograms. If an entry is None, a color will be chosen internally. If colors=None, all colors are chosen internally.
:type colors: List(str), optional, default=None
:param linestyles: List of matplotlib line style definitions for each entry in histograms. If an entry is None, the default line style of '-' will be chosen internally. If linestyles=None, all line styles are set to '-'.
:type linestyles: List(str), optional, default=None
:param linewidths: List of matplotlib line width definitions for each entry in histograms. If an entry is None, the default line width of 1 will be chosen. If linewidths=None, all line widths are set to 2.
:type linewidths: List(str), optional, default=None
'''
#initialize
linewidth_default = 2
linestyle_default = '-'
nhist = len(histograms)
pp.clf()
if temp is None:
fig, axs = pp.subplots(nrows=1, ncols=1, squeeze=False)
else:
fig, axs = pp.subplots(nrows=1, ncols=2, squeeze=False)
cmap = cm.get_cmap('tab10')
cv_unit = histograms[0].cv_output_unit
cv_label = histograms[0].cv_label
fmax = -np.inf
#add probability histogram and possible free energy plots
for ihist, hist in enumerate(histograms):
#set label
label = 'Histogram %i' %ihist
if labels is not None:
label = labels[ihist]
#set color, linestyles, ...
color = None
if colors is not None:
color = colors[ihist]
if color is None:
color = cmap(ihist)
linewidth = None
if linewidths is not None:
linewidth = linewidths[ihist]
if linewidth is None:
linewidth = linewidth_default
linestyle = None
if linestyles is not None:
linestyle = linestyles[ihist]
if linestyle is None:
linestyle = linestyle_default
#plot probability
axs[0,0].plot(hist.cvs/parse_unit(cv_unit), hist.ps/max(hist.ps), linewidth=linewidth, color=color, linestyle=linestyle, label=label)
if hist.error is not None:
axs[0,0].fill_between(hist.cvs/parse_unit(cv_unit), hist.plower()/max(hist.ps), hist.pupper()/max(hist.ps), color=color, alpha=0.33)
#free energy if requested
if temp is not None:
fep = BaseFreeEnergyProfile.from_histogram(hist, temp)
fmax = max(fmax, np.ceil(max(fep.fs[~np.isnan(fep.fs)]/kjmol)/10)*10)
funit = fep.f_output_unit
axs[0,1].plot(fep.cvs/parse_unit(cv_unit), fep.fs/parse_unit(funit), linewidth=linewidth, color=color, linestyle=linestyle, label=label)
if fep.error is not None:
axs[0,1].fill_between(fep.cvs/parse_unit(cv_unit), fep.flower()/parse_unit(funit), fep.fupper()/parse_unit(funit), color=color, alpha=0.33)
#decorate
cv_range = [min(histograms[0].cvs/parse_unit(cv_unit)), max(histograms[0].cvs/parse_unit(cv_unit))]
axs[0,0].set_xlabel('%s [%s]' %(cv_label, cv_unit), fontsize=14)
axs[0,0].set_ylabel('Relative probability [-]', fontsize=14)
axs[0,0].set_title('Probability histogram', fontsize=14)
axs[0,0].set_xlim(cv_range)
axs[0,0].legend(loc='best')
if temp is not None:
axs[0,1].set_xlabel('%s [%s]' %(cv_label, cv_unit), fontsize=14)
axs[0,1].set_ylabel('F [%s]' %funit, fontsize=14)
axs[0,1].set_title('Free energy profile', fontsize=14)
axs[0,1].set_xlim(cv_range)
if flims is None:
axs[0,1].set_ylim([-1,fmax])
else:
axs[0,1].set_ylim(flims)
axs[0,1].legend(loc='best')
#save
if temp is not None:
fig.set_size_inches([16,8])
else:
fig.set_size_inches([8,8])
if fn is not None:
pp.savefig(fn)
else:
pp.show()