Source code for quickff.tools

# -*- coding: utf-8 -*-
# QuickFF is a code to quickly derive accurate force fields from ab initio input.
# Copyright (C) 2012 - 2018 Louis Vanduyfhuys <Louis.Vanduyfhuys@UGent.be>
# Steven Vandenbrande <Steven.Vandenbrande@UGent.be>,
# Jelle Wieme <Jelle.Wieme@UGent.be>,
# Toon Verstraelen <Toon.Verstraelen@UGent.be>, Center for Molecular Modeling
# (CMM), Ghent University, Ghent, Belgium; all rights reserved unless otherwise
# stated.
#
# This file is part of QuickFF.
#
# QuickFF is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# QuickFF is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
#
#--

from __future__ import print_function, absolute_import

from molmod.units import deg, angstrom, centimeter
from molmod.constants import lightspeed
from molmod.periodic import periodic as pt

from yaff import Chebychev1, Chebychev2, Chebychev3, Chebychev4, Chebychev6

from quickff.log import log

import numpy as np, math

__all__ = [
    'global_translation', 'global_rotation', 'fitpar',
    'boxqp', 'set_ffatypes', 'term_sort_atypes', 'get_multiplicity',
    'get_restvalue', 'get_ei_radii', 'digits', 'average', 'chebychev',
    'project_negative_freqs'
]


[docs]def global_translation(coords): ''' A function to generate vectors that represent global translations of a system. **Arguments** coords a (N,3) numpy array describing the system that has to be translated ''' Natoms = len(coords) ones = np.ones(Natoms, float) zeros = np.zeros(Natoms, float) VTx = np.concatenate(np.array([ones, zeros, zeros]).transpose())/np.sqrt(Natoms) VTy = np.concatenate(np.array([zeros, ones, zeros]).transpose())/np.sqrt(Natoms) VTz = np.concatenate(np.array([zeros, zeros, ones]).transpose())/np.sqrt(Natoms) return VTx, VTy, VTz
[docs]def global_rotation(coords): ''' A function to generate vectors that represent global translations of a system. Rx is a matrix of rotatino around the x-axis minus the identity matrix. VRx is a vector of rotation around x-axis. **Arguments** coords a (N,3) numpy array describing the system that has to be translated ''' Natoms = len(coords) com = coords.sum(axis=0)/coords.shape[0] Rz = np.array([ [ 0.0,-1.0, 0.0], [ 1.0, 0.0, 0.0], [ 0.0, 0.0, 0.0] ]) Ry = np.array([ [ 0.0, 0.0, 1.0], [ 0.0, 0.0, 0.0], [-1.0, 0.0, 0.0] ]) Rx = np.array([ [ 0.0, 0.0, 0.0], [ 0.0, 0.0, 1.0], [ 0.0,-1.0, 0.0] ]) VRx = np.dot(coords-com, Rx.transpose()).reshape([3*Natoms]) VRy = np.dot(coords-com, Ry.transpose()).reshape([3*Natoms]) VRz = np.dot(coords-com, Rz.transpose()).reshape([3*Natoms]) U = np.linalg.svd( np.array([VRx, VRy, VRz]).transpose() )[0] VRx = U.transpose()[0] VRy = U.transpose()[1] VRz = U.transpose()[2] return VRx, VRy, VRz
[docs]def fitpar(xs, ys, rcond=-1): ''' Fit a parabola to the samples (xs, ys): ys[:] = a*xs[:]^2 + b*xs[:] + c Returns the parabola parameters a, b and c. **Arguments** xs a (N) numpy array containing the x values of the samples ys a (N) numpy array containing the x values of the samples ''' assert len(xs)==len(ys) D = np.ones([len(xs), 3], float) for i, x in enumerate(xs): D[i, 0] = 0.1*x**2 D[i, 1] = x sol, res, rank, svals = np.linalg.lstsq(D, ys, rcond=rcond) sol[0] *= 0.1 return sol
[docs]def boxqp(A, B, bndl, bndu, x0, threshold=1e-9, status=False): ''' Minimize the function 1/2*xT.A.x - B.x subject to bndl < x < bndu (element-wise) This minimization is performed using a projected gradient method with step lengths computed using the Barzilai-Borwein method. See 10.1007/s00211-004-0569-y for a description. **Arguments** A (n x n) NumPy array appearing in cost function B (n) NumPy array appearing in cost function bndl (n) NumPy array giving lower boundaries for the variables bndu (n) NumPy array giving upper boundaries for the variables x0 (n) NumPy array providing an initial guess **Optional Arguments** threshold Criterion to consider the iterations converged status Return also the number of iterations performed ''' # Check that boundaries make sense assert np.all(bndl<bndu), "Some lower boundaries are higher than upper boundaries" # Check that matrix A is positive definite def project(x): '''Project x on to the box of constraints''' x[x<bndl] = bndl[x<bndl] x[x>bndu] = bndu[x>bndu] return x def gradient(x): return np.dot(A,x) - B def stopping(x): q = gradient(x) mask = x==bndl q[mask] = np.amin(np.asarray([q[mask],[0.0]*np.sum(mask)]), axis=0) mask = x==bndu q[mask] = np.amax(np.asarray([q[mask],[0.0]*np.sum(mask)]), axis=0) return np.linalg.norm(q) # Bootstrapping alpha alpha = 0.1 g0 = gradient(x0) x1 = project(x0-alpha*g0) gstop = np.linalg.norm(gradient(x1)) converged = False nit = 0 while converged is False: nit += 1 # New gradient g1 = gradient(x1) # Compute new step length s = x1 - x0 y = g1 - g0 alpha = np.dot(s,s)/np.dot(s,y) # Update old values x0 = x1 g0 = g1 # Compute new values x1 = project(x1-alpha*g1) if stopping(x1)/gstop < threshold: converged = True if status: return x1, nit else: return x1
[docs]def set_ffatypes(system, level): ''' A method to guess atom types. This will overwrite ffatypes that are already defined in the system. **Arguments:** system A yaff system instance level If level is a string containing comma's, it is assumed to be an ordered list containing the atom type of each atom in the system. Otherwise, level is assumed to be a string defining how to guess the atom types from the local topology. Possible levels are: * low: based on atomic number * medium: based on atomic number and number of neighbors * high: based on atomic number, number of neighbors and atomic number of neighbors * highest: based on index in the molecule ''' if system.ffatypes is not None: raise ValueError('Atom types are already defined in the system.') if ',' in level: atypes = level.split(',') elif level == 'low': atypes = np.array([pt[number].symbol for number in system.numbers]) elif level == 'medium': atypes = [] for index, number in enumerate(system.numbers): nind = system.neighs1[index] sym = pt[system.numbers[index]].symbol.upper() atype = '%s%i' % (sym, len(nind)) atypes.append(atype) elif level == 'high': atypes = [] for index, number in enumerate(system.numbers): nsyms = sorted([ pt[system.numbers[neigh]].symbol.lower() for neigh in system.neighs1[index] ]) sym = pt[system.numbers[index]].symbol.upper() if len(nsyms)==1: atype = '%s1_%s' % (sym, nsyms[0]) elif len(nsyms)==2: atype = '%s2_%s%s' % (sym, nsyms[0], nsyms[1]) else: atype = '%s%i' % (sym, len(system.neighs1[index])) neighs = {} for nsym in nsyms: if nsym=='h': continue if nsym in list(neighs.keys()): neighs[nsym] += 1 else: neighs[nsym] = 1 for nsym, nnum in neighs.items(): atype += '_%s%i' %(nsym, nnum) atypes.append(atype) elif level == 'highest': atypes = np.array([ '%s%i' % (pt[n].symbol, i) for i, n in enumerate(system.numbers) ]) else: raise ValueError('Invalid level, recieved %s' % level) system.ffatype_ids = np.zeros(len(system.numbers), int) system.ffatypes = [] for i, atype in enumerate(atypes): if atype not in system.ffatypes: system.ffatypes.append(atype) system.ffatype_ids[i] = system.ffatypes.index(atype) system.ffatypes = np.array(system.ffatypes)
[docs]def term_sort_atypes(ffatypes, indexes, kind): ''' Routine to sort the atoms defined in indexes to give consistent term names. This routine returns the sorted atom indexes as well as the corresponding atom types. ''' atypes = [ffatypes[i] for i in indexes] if kind in ['bond', 'dist', 'bend', 'angle']: if atypes[-1]<atypes[0] \ or (atypes==atypes[::-1] and indexes[-1]<indexes[0]) : sorted_indexes = indexes[::-1] sorted_atypes = atypes[::-1] else: sorted_indexes = indexes sorted_atypes = atypes elif kind in ['dihed', 'dihedral', 'torsion']: if atypes[-1]<atypes[0] \ or (atypes[-1]==atypes[0] and atypes[-2]<atypes[1]) \ or (atypes==atypes[::-1] and indexes[-1]<indexes[0]): sorted_indexes = indexes[::-1] sorted_atypes = atypes[::-1] else: sorted_indexes = indexes sorted_atypes = atypes elif kind in ['opdist', 'oopdist']: pairs = sorted(zip(indexes[:3], atypes[:3]), key=lambda x: x[1]+str(x[0])) sorted_indexes = [index for index, atype in pairs] sorted_indexes.append(indexes[3]) sorted_atypes = [atype for index, atype in pairs] sorted_atypes.append(atypes[3]) return tuple(sorted_indexes), tuple(sorted_atypes)
[docs]def get_multiplicity(n1, n2): 'Routine to estimate m from local topology' if set([n1,n2])==set([4,4]): return 3 elif set([n1,n2])==set([3,4]): return 6 elif set([n1,n2])==set([2,4]): return 3 elif set([n1,n2])==set([3,3]): return 2 elif set([n1,n2])==set([2,3]): return 2 elif set([n1,n2])==set([2,2]): return 1 else: return np.nan
[docs]def get_restvalue(values, m, thresshold=20*deg, mode=1): ''' Get a rest value of 0.0, 360/(2*m) or None depending on the given equilbrium values. For mode=0, the rest value is: 0, if all 'values modulo per' are in the interval [0,thresshold] U [per-thresshold,per] per/2, if all 'values modulo per' are in the interval [per/2-thresshold,per/2+thresshold] None, in all other cases For mode=1, the rest value is determined as follows: first the values are folded in the interval [0,per/2] by first taking the module with per and then mirroring values in [per/2,per] on [0,per/2]. Next, the mean and std of the folded values are computed. If the std is larger then the thresshold, the values are considered to be too scattered and no rest value can be computed (None is returned). If the std is small enough, the rest value will be determined based on the mean. If the mean is close enough to 0, the rest value will be 0. If the mean is close enough to per/2, the rest value will be per/2. In all other cases no rest value will be computed and None is returned. ''' rv = None per = 360*deg/m if mode==0: for value in values: x = value % per if abs(x)<=thresshold or abs(per-x)<thresshold: if rv is not None and rv!=0.0: return None elif rv is None: rv = 0.0 elif abs(x-per/2.0)<thresshold: if rv is not None and rv!=per/2.0: return None elif rv is None: rv = per/2.0 else: return None return rv elif mode==1: folded = np.zeros(len(values), float) for i, value in enumerate(values): new = value % per if new>0.5*per: folded[i] = per - new else: folded[i] = new mean = folded.mean() std = folded.std() assert 0.0<=mean and mean<=0.5*per if std<thresshold: if mean<=0.25*per: rv=0.0 elif 0.5*per-mean<=0.25*per: rv=0.5*per return rv else: raise NotImplementedError('Mode %i in get_restvalue is not supported' %mode)
[docs]def get_ei_radii(numbers): ''' Routine to return atomic radii for use in the Gaussian charge distribution. These radii are computed according to the procedure of Chen and Slater: First the Slater exponent is computed from the hardness using the formula of Rappe and Goddard (hardness of Pearson and Parr is used) Next the gaussian exponent alpha is fitted by minimizing the L2-difference between the between the homonuclear Coulomb integral over Slater orbitals and over Gaussian orbitals. ''' radii = { 'H' : 0.7308*angstrom, 'Li': 1.2951*angstrom, 'B' : 1.2020*angstrom, 'C' : 1.1703*angstrom, 'N' : 1.1048*angstrom, 'O' : 1.1325*angstrom, 'F' : 1.1096*angstrom, 'Na': 1.7093*angstrom, 'Mg': 1.6155*angstrom, 'Al': 1.6742*angstrom, 'Si': 1.6376*angstrom, 'P' : 1.5727*angstrom, 'S' : 1.6011*angstrom, 'Cl': 1.5798*angstrom, 'Ca': 1.6541*angstrom, 'Sc': 2.0559*angstrom, 'Ti': 2.0502*angstrom, 'V' : 2.0654*angstrom, 'Cr': 2.0692*angstrom, 'Mn': 2.0323*angstrom, 'Fe': 2.0695*angstrom, 'Co': 2.0377*angstrom, 'Ni': 2.0579*angstrom, 'Cu': 2.0573*angstrom, 'Zn': 1.9896*angstrom, 'Ga': 2.0820*angstrom, 'Br': 2.0088*angstrom, } values = np.zeros(len(numbers), float) for i, number in enumerate(numbers): symbol = pt[number].symbol if not symbol in list(radii.keys()): raise ValueError('No electrostatic Gaussian radii found for %s' %symbol) values[i] = radii[symbol] return values
[docs]def digits(x,n): """ returns a string representation of x with exactly n digits if possible. """ if np.isnan(x): return '' if len(str(x))==n: return str(x) sign = np.sign(x) x = float(abs(x)) if x < 0.5*10**(-(n-1)): return "." + "0"*(n-1) if sign<0: n -= 1 i = int(x) r = x-int(x) if i==0: if sign<0: return '-'+str(r)[1:1+n] else: s = '%f' %r return s[1:1+n] if r==0: return str(int(i*sign))[:n] if len(str(i))>=(n-1): return str(int(i*sign)) ndig = n - len(str(i))-1 if sign<0: return '-%i.%s' %(i, str(r)[2:2+ndig]) else: return '%i.%s' %(i, str(r)[2:2+ndig])
[docs]def average(data, ffatypes, fmt='full', verbose=False): ''' Average the atomic parameters stored in data over atoms of the same atom type. **Arguments** data a list or numpy array containing the data ffatypes a listor numpy array containing the atom types. Should have equal length as data **Keywork arguments** fmt Should be either full, dict or sort. In case of full, the result will be returned as an numpy array of equal length as data and ffatypes. In case of dict, the result will be returned as a dictionairy of the following format: {ffatype0: value0, ffatype1: value1, ...} in which value0, ... is the mean value of the given ffatype. In case of sort, a dictionairy of the following format will be returned: {ffatype0: values0, ffatype1: values1, ...} in which values0, ... is a list of the values for the given ffatype. ''' data_atypes = {} for value, ffatype in zip(data,ffatypes): if ffatype in list(data_atypes.keys()): data_atypes[ffatype].append(value) else: data_atypes[ffatype] = [value] if fmt=='sort': output = {} for ffatype, data in data_atypes.items(): output[ffatype] = np.array(data) elif fmt=='full': output = np.zeros(len(data)) printed = [] for i, ffatype in enumerate(ffatypes): std = np.array(data_atypes[ffatype]).std() if not std < 1e-2 and ffatype not in printed: print('WARNING: charge of atom type %s has a large std: %.3e' %(ffatype, std)) printed.append(ffatype) output[i] = np.array(data_atypes[ffatype]).mean() elif fmt=='dict': output = {} for ffatype, values in data_atypes.items(): output[ffatype] = np.array(values).mean() else: raise IOError('Format %s not supported, should be full or dict' %fmt) if verbose: print('Averaged Atomic Charges:') print('------------------------') for ffatype, values in data_atypes.items(): print(' %4s % .3f +- % .3f (N=%i)' %(ffatype, np.array(values).mean(), np.array(values).std(), len(values))) print('') return output
def charges_to_bcis(charges, ffatypes, bonds, constraints={}, verbose=True): ''' Transform atomic charges to bond charge increments, by definition 2 bci's between different pairs of atoms but with identical pairs of atom types will be equal. Bci's will be returned as a dictionairy containing tuples of the format (ffatype0.ffatype1, bci_value) **Arguments** charges a (N,) list or numpy array containing the charges ffatypes a (N,) list or numpy array with the atom type of each atom in the system bonds a (B,2) list or numpy array for each bond in the system **Keyword Arguments** constraints a dictionairy of format (master, [(slave0,sign0), (slave1,sign1), ...]) verbose increase verbosity ''' assert len(charges)==len(ffatypes) #construct list of bond types and signs, the signs are related to the #direction of the bci of a certain bond. We want that bonds of type #A.B have a bci with equal magnitude as a bond of type B.A. Therefore, we #only store the bci of A.B (alphabetically) and also store a sign, which is #1.0 for bond A.B and -1 for bond B.A btypes = ['',]*len(bonds) signs = np.zeros([len(bonds)], float) for i, bond in enumerate(bonds): ffatype0, ffatype1 = ffatypes[bond[0]], ffatypes[bond[1]] if ffatype0<ffatype1: btypes[i] = '%s.%s' %(ffatype0,ffatype1) signs[i] = 1.0 else: btypes[i] = '%s.%s' %(ffatype1,ffatype0) signs[i] = -1.0 #decompile constraints masterof = {} for m, s in constraints.items(): types = m.split('.') if types[0]>=types[1]: m = '.'.join(types[::-1]) for slave, sign in s: types = slave.split('.') if types[0]>=types[1]: slave = '.'.join(types[::-1]) assert slave not in list(masterof.keys()), \ 'Slave %s has multiple masters in constraints' %slave masterof[slave] = (m, sign) masterlist = [] for btype in btypes: if btype in list(masterof.keys()) : continue if btype in masterlist: continue masterlist.append(btype) for master in masterlist: assert not master in list(masterof.keys()), 'master %s encountered in slaves' %master for slave in list(masterof.keys()): assert not slave in masterlist, 'slave %s encountered in masters' %slave if verbose: print('Master-slaves relations:') print('------------------------') for slave in list(masterof.keys()): print(slave, masterof[slave]) if len(list(masterof.keys()))==0: print('(None)') print('') #construct the matrix to convert bci's to charges #matrix[i,n] is the contribution to charge i from bci n #bci p_AB is a charge transfer from B to A, hence qA+=p_AB and qB-=p_AB matrix = np.zeros([len(charges), len(masterlist)], float) for i, (btype, bond, sign) in enumerate(zip(btypes, bonds, signs)): if btype in masterlist: index = masterlist.index(btype) sign_switch = 1.0 elif btype in list(masterof.keys()): master, sign_switch = masterof[btype] index = masterlist.index(master) else: raise ValueError('No master found for bond %s of type %s' %(bond, btype)) matrix[bond[0],index] += sign*sign_switch matrix[bond[1],index] += -sign*sign_switch #solve the set of equations q=M.t with q the full array of atomic charges #and t the array of bci masters bcis, res, rank, svals = np.linalg.lstsq(matrix, charges, rcond=1e-6) #print statistics if required if verbose: print('Fitting SQ to charges:') print('----------------------') print(' sing vals = ', svals) if min(svals)>0: print(' cond numb = ', max(svals)/min(svals)) else: print(' cond numb = inf') print('') print('Resulting split charges:') print('------------------------') for btype, sq in zip(masterlist, bcis): print(' %10s % .3f' %(btype, sq)) print('') print('Statistics of the BCI charges:') print('------------------------------') apriori_values = average(charges, ffatypes, 'sort') aposteriori_values = average(np.dot(matrix, bcis), ffatypes, 'sort') print(' %10s | %6s +- %5s (%2s) | %6s +- %5s (%2s) | %9s ' %('Atype', '<Qin>', 'std', 'N', '<Qbci>', 'std', 'N', 'RMSD')) print(' '+'-'*71) sums = np.array([0.0, 0.0, 0.0]) for atype, qins in apriori_values.items(): qouts = aposteriori_values[atype] print(' %10s | % 6.3f +- %5.3f (%2i) | % 6.3f +- %5.3f (%2i) | % 9.6f ' %(atype, qins.mean(), qins.std(), len(qins), qouts.mean(), qouts.std(), len(qouts), np.sqrt(((qouts-qins)**2).mean()) )) sums += np.array([qins.sum(), qouts.sum(), ((qouts-qins)**2).sum()]) print(' '+'-'*71) print(' %10s | % 9.6f | % 9.6f | % 9.6f ' %('ALL', sums[0], sums[1], np.sqrt(sums[2]/len(ffatypes)) )) print('') #construct output dictionnary containing also bci's of slaves result = dict((btype, bci) for btype, bci in zip(masterlist, bcis)) for slave, (master, sign) in masterof.items(): result[slave] = result[master]*sign return result def chebychev(m, x): if m==0: return 1 elif m==1: return x else: return 2.0*x*chebychev(m-1,x)-chebychev(m-2,x) def project_negative_freqs(hessian, masses, thresshold=0.0): N = len(masses) sqrt_mass_matrix = np.diag(np.sqrt((np.array([masses, masses, masses]).T).ravel())) isqrt_mass_matrix = np.linalg.inv(sqrt_mass_matrix) matrix = np.dot(isqrt_mass_matrix, np.dot(hessian.reshape([3*N,3*N]), isqrt_mass_matrix)) #diagonalize if ((matrix-matrix.T)<1e-6*lightspeed/centimeter).all(): evals, evecs = np.linalg.eigh(matrix) else: evals, evecs = np.linalg.eig(matrix) log.dump('20 lowest frequencies [1/cm] before projection:') log.dump(str(evals[:4]/(lightspeed/centimeter))) log.dump(str(evals[4:8]/(lightspeed/centimeter))) log.dump(str(evals[8:12]/(lightspeed/centimeter))) log.dump(str(evals[12:16]/(lightspeed/centimeter))) log.dump(str(evals[16:20]/(lightspeed/centimeter))) #set negative eigenvalues to zero evals[evals<thresshold] = 0.0 projected_matrix = np.dot(evecs, np.dot(np.diag(evals), evecs.T)) projected_hessian = np.dot(sqrt_mass_matrix, np.dot(projected_matrix, sqrt_mass_matrix)) #dump freqs after projection as check evals, evecs = np.linalg.eigh(projected_matrix) log.dump('20 lowest frequencies [1/cm] after projection:') log.dump(str(evals[:4]/(lightspeed/centimeter))) log.dump(str(evals[4:8]/(lightspeed/centimeter))) log.dump(str(evals[8:12]/(lightspeed/centimeter))) log.dump(str(evals[12:16]/(lightspeed/centimeter))) log.dump(str(evals[16:20]/(lightspeed/centimeter))) return projected_hessian.reshape([N, 3, N, 3])