16 from math
import sqrt,exp
17 from StringIO
import StringIO
19 from random
import random
21 kB= (1.381 * 6.02214) / 4184.0
24 def __init__(self, cont, md, n_md_steps):
27 self.n_md_steps = n_md_steps
34 for i
in xrange(self.cont.get_number_of_particles()):
35 p=self.cont.get_particle(i)
38 self.oldcoords.append(d.get_coordinates())
40 def propose_move(self, prob):
41 self.md.optimize(self.n_md_steps)
44 for i
in xrange(self.cont.get_number_of_particles()):
45 p=self.cont.get_particle(i)
49 d.set_coordinates(self.oldcoords[i])
51 def get_number_of_steps(self):
52 return self.n_md_steps
54 def set_number_of_steps(self, nsteps):
55 self.n_md_steps = nsteps
60 def __init__(self,model):
63 def add_mover(self,mv):
69 def set_return_best(self,thing):
71 def set_move_probability(self,thing):
75 pot=self.m.evaluate(
False)
77 kin=self.mv.md.get_kinetic_energy()
80 def metropolis(self, old, new):
83 print ": old %f new %f deltaE %f new_epot: %f" % (old,new,deltaE,
84 self.m.evaluate(
False)),
89 return exp(-deltaE/kT) > random()
91 def optimize(self,nsteps):
93 print "=== new MC call"
96 for i
in xrange(nsteps):
97 print "MC step %d " % i,
99 self.mv.md.assign_velocities(self.kT / kB)
101 old=self.get_energy()
103 self.mv.propose_move(1)
105 new=self.get_energy()
106 if self.metropolis(old,new):
116 def get_number_of_forward_steps(self):
120 """nonspecific methods used across all shared function objects.
122 - Their name starts with the name of the parent function (e.g.
124 - they don't store anything in the class, but instead
125 return all created objects.
126 Exceptions: the model, which is self._m
127 the statistics class, which is self.stat
128 - they store what they have to store in the model (e.g. restraints)
129 - they don't print out anything except for long routines (e.g. NOE
131 - the prior RestraintSet is added to the model when it is created, so
132 that when it is passed to another function, it is not added twice.
138 def set_checklevel(self,value):
141 def set_loglevel(self,value):
144 def m(self,name,*args,**kw):
145 "wrapper to call methods of m"
146 func=getattr(self._m,name)
147 return func(*args, **kw)
150 "moves to wd and creates model"
156 ff_temp=300.0, disulfides=
None, representation=
'custom'):
157 """creates a CHARMM protein representation.
158 creates the charmm force field, bonded and nonbonded.
159 - initpdb: initial structure in pdb format
160 - top is a CHARMM top.lib, read if representation=='custom' (default)
161 - par is a CHARMM par.lib
162 - selector is an instance of
163 one of the selectors of IMP.atom, for example
164 IMP.atom.NonWaterNonHydrogenPDBSelector().
165 - pairscore is an instance of a Pair Score to score the interaction
166 between two atoms. usually, it's either
167 LennardJonesDistancePairScore(0,1) or
168 RepulsiveDistancePairScore(0,1)
169 - ff_temp is the temperature at which the force field should be
171 - disulfides: if not None, a list of tuples corresponding to residue
172 numbers that should be cross-linked. Residues should be
173 cysteines, and residue numbering should start at 0.
174 - representation: 'full' : all-atom CHARMM force field
175 'heavy': heavy atom CHARMM forcefield with polar H
176 'calpha': only C alphas, ball and stick model with
177 bondlength 3.78 angstrom, beads at VdW
178 contact, and harmonic restraint between
180 'custom' (default) : read given CHARMM top and par
182 Returns: prot, ff, rsb, rs
184 - ff: the force field
185 - rsb: the RestraintSet on bonded interactions
186 - rs: the RestraintSet on nonbonded interactions. Both are weighted
191 if not prot.get_is_valid(
True):
192 raise ValueError,
"invalid hierarchy!"
193 if representation ==
'custom':
196 elif representation ==
'heavy':
198 elif representation ==
'full':
200 elif representation ==
'calpha':
203 raise NotImplementedError, representation
204 if representation ==
'calpha':
205 print "setting up simplified C alpha force field"
212 for chain
in prot.get_children():
213 residues=[(chain.get_child(i),chain.get_child(i+1))
214 for i
in xrange(chain.get_number_of_children()-1)]
215 residues=[(i.get_child(0).get_particle(),
216 j.get_child(0).get_particle())
217 for (i,j)
in residues]
218 pairs.extend(residues)
237 rsb.add_restraint(br)
238 rsb.set_weight(1.0/(kB*ff_temp))
241 nonbonded_pair_filter.set_bonds(bonds)
244 print "setting up CHARMM forcefield"
248 topology = ff.create_topology(prot)
252 topology.apply_default_patches()
255 s=topology.get_segment(0)
256 dis=ff.get_patch(
'DISU')
257 for (i,j)
in disulfides:
263 r0.set_patched(
False)
265 r1.set_patched(
False)
267 print "added disulfide bridge between cysteines %d and %d" % (i,j)
273 topology.setup_hierarchy(prot)
286 ff.add_well_depths(prot)
287 nonbonded_pair_filter = r.get_pair_filter()
299 nbl.add_pair_filter(nonbonded_pair_filter)
304 rs.set_weight(1.0/(kB*ff_temp))
306 return prot, ff, rsb, rs
309 """sets up a Scale particle to the initial default value. It can
310 optionnally be constrained between two positive bounds, or else its
311 range is 0 to infinity.
316 if lower != -1
or upper != -1:
318 IMP.isd.ScaleRangeModifier(),
None,scale))
322 """given a list of scales, returns a RestraintSet('prior') with weight
323 1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.
324 If argument prior_rs is used, add them to that RestraintSet instead.
328 self._m.add_restraint(prior_rs)
329 prior_rs.set_weight(1.0)
335 """given a list of scales, returns a RestraintSet('prior') with weight
336 1.0 that contains a list of JeffreysRestraint on each scale.
337 If argument prior_rs is used, add them to that RestraintSet instead.
341 self._m.add_restraint(prior_rs)
342 prior_rs.set_weight(1.0)
348 """given a list of scales, returns a RestraintSet('prior') with weight
349 1.0 that contains a list of vonMisesKappaConjugateRestraint on each
350 scale. If argument prior_rs is used, add them to that RestraintSet
353 if not (0 <= R <= c):
354 raise ValueError,
"parameters R and c should satisfy 0 <= R <= c"
357 self._m.add_restraint(prior_rs)
358 prior_rs.set_weight(1.0)
364 """scans the prot hierarchy and tries to find atom = (resno, name)
365 assumes that resno follows the same numbering as the sequence.
366 Stores already found atoms for increased speed.
368 if not hasattr(self,
'__memoized'):
369 self.__memoized={prot:{}}
371 return self.__memoized[prot][atom]
376 residue_index=atom[0],
378 ).get_selected_particles()
380 print "found multiple atoms for atom %d %s!" % atom
384 print "atom %d %s not found" % atom
386 self.__memoized[prot][atom] = p0
391 Sets up a vonMises torsion angle restraint using the given kappa
392 particle as concentration parameter. Returns the restraint.
393 data is a list of observations.
399 Sets up a vonMises torsion angle restraint using the given kappa
400 particle as concentration parameter. Returns the restraint.
401 data is (mean, standard deviation).
403 raise NotImplementedError
407 Sets up a lognormal distance restraint using the given sigma and gamma.
408 Returns the restraint.
409 assumes atoms = (atom1, atom2)
410 where atom1 is (resno, atomname) and resno is the residue sequence
422 """Reads an ambiguous NOE restraint. contributions is a list of
423 (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
424 lognormal distance restraint using the given sigma and gamma.
425 Returns the restraint.
428 pairs=[(self.
find_atom(i, prot).get_particle(),
429 self.
find_atom(j, prot).get_particle())
for (i,j)
in
436 def init_model_NOEs(self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
437 bounds_sigma=(1.0,0.1,100), bounds_gamma=(1.0,0.1,100),
438 verbose=
True, sequence_match=(1,1)):
439 """read TBL file and store NOE restraints, using one sigma and one gamma
440 for the whole dataset. Creates the necessary uninformative priors.
441 - prot: protein hierarchy
442 - seqfile: a file with 3-letter sequence
443 - tblfile: a TBL file with the restraints
444 - name: an optional name for the restraintset
445 - prior_rs: when not None, add new sigma and gamma to this
446 RestraintSet instance.
447 - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
448 bounds can be -1 to set to default range [0,+inf]
449 - verbose: be verbose (default True)
450 - sequence_match : (noe_start, sequence_start)
451 Returns: data_rs, prior_rs, sigma, gamma
455 print "Prior for the NOE Scales"
463 first_residue_number=sequence_match[1])
464 tblr = IMP.isd.TBLReader.TBLReader(sequence,
465 sequence_match=sequence_match)
466 restraints = tblr.read_distances(tblfile,
'NOE')[
'NOE']
467 for i,restraint
in enumerate(restraints):
468 if verbose
and i % 100 == 0:
474 if len(restraint[0]) > 1:
476 restraint[1], sigma, gamma)
479 restraint[1], sigma, gamma)
482 print "\r%d NOE restraints read" % i
486 self._m.add_restraint(rs)
487 return rs, prior_rs, sigma, gamma
490 verbose=
True, sequence_match=(1,1)):
491 """read TBL file and store NOE restraints, using the marginal of the
492 lognormal with one sigma and one gamma, for the whole dataset.
493 - prot: protein hierarchy
494 - seqfile: a file with 3-letter sequence
495 - tblfile: a TBL file with the restraints
496 - name: an optional name for the restraintset
497 - verbose: be verbose (default True)
498 - sequence_match : (noe_start, sequence_start)
505 first_residue_number=sequence_match[1])
506 tblr = IMP.isd.TBLReader.TBLReader(sequence,
507 sequence_match=sequence_match)
508 restraints = tblr.read_distances(tblfile,
'NOE')[
'NOE']
510 for i,restraint
in enumerate(restraints):
511 if verbose
and i % 100 == 0:
518 pairs=[(self.
find_atom(i, prot).get_particle(),
519 self.
find_atom(j, prot).get_particle())
for (i,j)
in
522 ln.add_contribution(pairs, restraint[4])
525 print "\r%d NOE contributions added" % (len(restraints))
527 self._m.add_restraint(rs)
532 """read TBL file and store lognormal restraints, using the marginal of the
533 lognormal with one sigma and gamma=1, for the whole dataset.
534 - prot: protein hierarchy
535 - seqfile: a file with 3-letter sequence
536 - tblfile: a TBL file with the restraints
537 - name: an optional name for the restraintset
538 - verbose: be verbose (default True)
545 tblr = IMP.isd.TBLReader.TBLReader(sequence)
546 restraints = tblr.read_distances(tblfile,
'HBond')[
'HBond']
548 for i,restraint
in enumerate(restraints):
549 if verbose
and i % 100 == 0:
556 pairs=[(self.
find_atom(i, prot).get_particle(),
557 self.
find_atom(j, prot).get_particle())
for (i,j)
in
560 ln.add_contribution(pairs, restraint[4])
563 print "\r%d Hbond contributions added" % (len(restraints))
565 self._m.add_restraint(rs)
569 sequence_match=(1,1),name=
'TALOS', prior_rs=
None,
570 bounds_kappa=(1.0, 0.1,10), verbose=
True, prior=
'jeffreys',
572 """read TALOS dihedral angle data, and create restraints for phi/psi
573 torsion angles, along with the prior for kappa, which is a scale for the
574 whole dataset, compare to 1/sigma**2 in the NOE case.
575 - prot: protein hierarchy
576 - seqfile: a file with 3-letter sequence
577 - talos_data: either a file (pred.tab), or a folder (pred/) in which all
578 files in the form res???.tab can be found. If possible,
579 try to provide the folder, as statistics are more
580 accurate in that case.
581 - fulldata : either True or False, whether the data is the full TALOS
582 output (predAll.tab or pred/ folder), or just the averages
584 - sequence_match : (talos_no, sequence_no) to adjust for different
586 - name: an optional name for the restraintset
587 - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.
588 - bounds_kappa: tuple of (initial value, lower, upper bound)
589 bounds can be -1 to set to default range [0,+inf]
590 - verbose: be verbose (default True)
591 - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
592 conjugate prior of the von Mises restraint, with parameters R
593 and c. Good values are R=0 and c=10. Default: jeffreys prior.
594 - keep_all: in case of a folder for 'talos_data', whether to keep
595 candidates marked as 'outliers' by TALOS, or to include them.
596 Returns: data_rs, prior_rs, kappa
601 print "Prior for von Mises Kappa"
609 first_residue_no=sequence_match[1])
612 sequence_match=sequence_match)
613 if os.path.isdir(talos_data):
615 for i,res
in enumerate(glob(os.path.join(talos_data,
'res???.tab'))):
616 if verbose
and i % 100:
622 talosr.read(talos_data)
626 sequence_match=sequence_match)
627 talosr.read(talos_data)
629 data = talosr.get_data()
631 print "\rread dihedral data for %d residues" % len(data)
632 print "creating restraints"
634 for resno,datum
in data.iteritems():
645 avgR.append(r.get_R0())
649 avgR.append(r.get_R0())
654 avgR.append(r.get_R0())
658 avgR.append(r.get_R0())
660 print "%s Restraints created. Average R0: %f" % \
661 (len(avgR), sum(avgR)/float(len(avgR)))
663 self._m.add_restraint(rs)
664 return rs, prior_rs, kappa
667 ff_type=IMP.saxs.HEAVY_ATOMS):
668 """read experimental SAXS profile and apply restraint the standard
670 Returns: a restraintset and the experimental profile
676 rs.add_restraint(saxs_restraint)
678 self._m.add_restraint(rs)
679 return rs, saxs_profile
681 def _setup_md(self,prot, temperature=300.0, thermostat='berendsen',
682 coupling=500, md_restraints=
None, timestep=1.0, recenter=1000,
684 """setup molecular dynamics
685 - temperature: target temperature
686 - thermostat: one of 'NVE', rescale_velocities',
687 'berendsen', 'langevin'
688 - coupling: coupling constant (tau (fs) for berendsen,
689 gamma (/fs) for langevin)
690 - md_restraints: if not None, specify the terms of the energy to be used
691 during the md steps via a list of restraints.
692 - timestep: in femtoseconds.
693 - recenter: recenter the molecule every so many steps (Langevin only)
694 - momentum: remove angular momentum every so many steps (Berendsen only)
695 Returns: an instance of md and an instance of an OptimizerState (the
696 thermostat), or None if NVE.
700 md.set_model(self._m)
701 md.assign_velocities(temperature)
702 md.set_time_step(timestep)
703 if thermostat ==
'NVE':
705 elif thermostat ==
'rescale_velocities':
708 md.add_optimizer_state(os)
709 elif thermostat ==
'berendsen':
712 md.add_optimizer_state(os)
715 md.add_optimizer_state(mom)
716 elif thermostat ==
'langevin':
719 md.add_optimizer_state(os)
724 raise NotImplementedError, thermostat
727 md.set_restraints(md_restraints)
731 def _setup_normal_mover(self, particle, floatkey, stepsize):
732 """setup NormalMover to move particle's floatkey attribute
733 by a gaussian with standard deviation 'stepsize'
734 Returns: mover instance.
737 cont.add_particle(particle)
742 def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
743 """setup MDMover using md and particles.
745 - particles: particles to move, usually the leaves of the protein
747 - n_md_steps: number of md steps to perform on each move
748 Returns: mover instance.
751 cont.add_particles(particles)
752 return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
754 def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
755 """setup monte carlo using a certain mover.
756 - mover: mover to use, NormalMover or MDMover usually.
757 - temperature: target temperature.
758 - mc_restraints: if not None, list of restraints for the metropolis
760 Returns: mc instance.
766 mc.set_kt(kB*temperature)
768 mc.set_return_best(
False)
770 mc.set_move_probability(1.0)
773 mc.set_restraints(mc_restraints)
777 gamma=0.01, n_md_steps=10,
778 md_restraints=
None, mc_restraints=
None, timestep=1.0,
779 sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
780 """setup hybrid monte-carlo on protein. Uses basin hopping with steepest
781 descent minimization.
782 - prot: protein hierarchy.
783 - temperature: target temperature.
784 - gamma: coupling constant for langevin (/fs)
785 - n_md_steps: number of md steps per mc step
786 - md_restraints: if not None, specify the terms of the energy to be used
788 - mc_restraints: if not None, use these energy terms for the metropolis
790 - timestep: time step for md, in femtoseconds.
791 - sd_threshold: stop steepest descent after energy difference drops
793 - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
794 - sd_maxsteps: maximum number of steps for steepest descent
795 Returns: hmc, mdmover, md and OptimizerState (thermostat)
797 raise NotImplementedError
798 md, os = self._setup_md(prot, temperature=temperature,
799 thermostat=
'langevin', coupling=gamma,
800 md_restraints=md_restraints, timestep=timestep)
802 mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
803 hmc = self._setup_mc(mdmover, temperature, mc_restraints)
805 sd.set_threshold(sd_threshold)
806 sd.set_step_size(sd_stepsize)
807 hmc.set_local_optimizer(sd)
808 hmc.set_local_steps(sd_maxsteps)
809 hmc.set_use_basin_hopping(
True)
810 return hmc, mdmover, md, os
813 n_md_steps=100, md_restraints=
None, mc_restraints=
None,
815 """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
816 - prot: protein hierarchy.
817 - temperature: target temperature.
818 - coupling: coupling constant (tau (fs) for berendsen,
819 gamma (/fs) for langevin)
820 - n_md_steps: number of md steps per mc step
821 - md_restraints: if not None, specify the terms of the energy to be used
823 - mc_restraints: if not None, use these energy terms for the metropolis
825 - timestep: time step for md, in femtoseconds.
826 - sd_threshold: stop steepest descent after energy difference drops
828 - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
829 - sd_maxsteps: maximum number of steps for steepest descent
830 Returns: hmc, mdmover and md
832 prot = self._p[
'prot']
833 md, os = self._setup_md(prot, temperature=temperature,
834 thermostat=
'NVE', coupling=
None, timestep=1.0,
835 md_restraints=md_restraints)
838 cont.add_particles(particles)
839 mdmover = PyMDMover(cont, md, n_md_steps)
845 mc.add_mover(mdmover)
847 mc.set_kt(kB*temperature)
849 mc.set_return_best(
False)
851 mc.set_move_probability(1.0)
852 return mc, mdmover, md
855 mc_restraints=
None, floatkey=
IMP.FloatKey(
"scale"), nm_stepsize=0.1):
856 """sets up monte carlo on scale, at a certain target temperature,
857 optionnally using a certain set of restraints only.
858 - scale: scale particle
859 - temperature: target temperature
860 - mc_restraints: optional set of restraints from which the energy should
861 be drawn instead of the energy of the complete system.
862 - floatkey: the floatkey to move.
863 - nm_stepsize: the stepsize of the normal mover
864 Returns: mc instance, nm instance.
866 nm = self._setup_normal_mover(scale, floatkey, nm_stepsize)
867 mc = self._setup_mc(nm, temperature, mc_restraints)
870 def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
871 adjust_stepsize=
True):
872 """run mc using a normal mover on a single particle,
873 update stepsize and store nsteps, acceptance and stepsize in the
874 statistics instance self.stat by using the key stats_key.
878 naccept = mc.get_number_of_forward_steps()
880 self.stat.increment_counter(stats_key, nsteps)
882 accept = float(naccept)/nsteps
883 self.stat.update(stats_key,
'acceptance', accept)
884 stepsize = nm.get_sigma()
885 self.stat.update(stats_key,
'stepsize', stepsize)
888 if 0.4 < accept < 0.6:
894 nm.set_sigma(stepsize*2*accept)
896 def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
897 adjust_stepsize=
True):
898 """run hmc, update stepsize and print statistics. Updates number of MD
899 steps to reach a target acceptance between 0.4 and 0.6, sends
900 statistics to self.stat. MD steps are always at least 10 and at most 500.
903 naccept = hmc.get_number_of_forward_steps()
904 self.stat.increment_counter(stats_key, nsteps)
905 accept = float(naccept)/nsteps
906 self.stat.update(stats_key,
'acceptance', accept)
907 mdsteps = mv.get_number_of_steps()
908 self.stat.update(stats_key,
'n_md_steps', mdsteps)
910 if 0.4 < accept < 0.6:
912 mdsteps = int(mdsteps *2**(accept-0.5))
917 mv.set_nsteps(mdsteps)
920 """returns a string corresponding to the pdb structure of hierarchy
925 return output.getvalue()
927 def get_netcdf(self, prot):
928 raise NotImplementedError
931 temperature=300.0, prot_coordinates=
None):
932 """updates statistics for md simulation: target temp, kinetic energy,
933 kinetic temperature, writes coordinates and increments counter.
934 - md_key: stats md key
935 - nsteps: number of steps performed.
936 - md_instance: instance of the MolecularDynamics class.
937 - temperature: target temperature
938 - prot_coordinates: protein coordinates to be passed to the stats class,
941 self.stat.update(md_key,
'target_temp', temperature)
942 kinetic = md_instance.get_kinetic_energy()
943 self.stat.update(md_key,
'E_kinetic', kinetic)
944 self.stat.update(md_key,
'instant_temp',
945 md_instance.get_kinetic_temperature(kinetic))
946 self.stat.update_coordinates(md_key,
'protein', prot_coordinates)
947 self.stat.increment_counter(md_key, nsteps)
950 """shortcut for a frequent series of operations on MC simulations'
951 statistics. Creates an entry for acceptance, stepsize and one
952 coordinate set printed in the statistics file.
955 mc_key = stat.add_category(name=name)
957 stat.add_entry(mc_key, entry=Entry(
'temperature',
'%10G',
None))
958 stat.add_entry(mc_key, entry=Entry(
'acceptance',
'%10G',
None))
959 stat.add_entry(mc_key, entry=Entry(
'stepsize',
'%10G',
None))
961 stat.add_entry(mc_key, entry=Entry(coord,
'%10G',
None))
963 stat.add_entry(mc_key, name=
'counter')
967 """shortcut for a frequent series of operations on MD simulations'
968 statistics. Creates an entry for target temp, instantaneous temp,
969 kinetic energy, and one set of coordinates called 'protein' by
973 md_key = stat.add_category(name=name)
975 stat.add_entry(md_key, entry=Entry(
'target_temp',
'%10G',
None))
976 stat.add_entry(md_key, entry=Entry(
'instant_temp',
'%10G',
None))
977 stat.add_entry(md_key, entry=Entry(
'E_kinetic',
'%10G',
None))
979 stat.add_coordinates(md_key, coord)
981 stat.add_entry(md_key, name=
'counter')
985 """shortcut for a frequent series of operations on HMC simulations'
986 statistics. Adds acceptance, number of MD steps and a trajectory for
990 hmc_key = stat.add_category(name=name)
992 stat.add_entry(hmc_key, entry=Entry(
'temperature',
'%10G',
None))
993 stat.add_entry(hmc_key, entry=Entry(
'acceptance',
'%10G',
None))
994 stat.add_entry(hmc_key, entry=Entry(
'n_md_steps',
'%10G',
None))
995 stat.add_entry(hmc_key, entry=Entry(
'E_kinetic',
'%10G',
None))
997 stat.add_coordinates(hmc_key, coord)
999 stat.add_entry(hmc_key, name=
'counter')
1003 """rescale the velocities of a bunch of particles having vx vy and vz
1009 p.set_value(k, p.get_value(k)*factor)