IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import sys
4 import os
5 from glob import glob
6 
7 import IMP
8 import IMP.atom
9 import IMP.container
10 import IMP.saxs
11 import IMP.core
12 import IMP.isd
13 import IMP.isd.TBLReader
15 import IMP.isd.utils
16 from IMP.isd.Entry import Entry
17 
18 from math import sqrt, exp
19 from io import StringIO
20 
21 from random import random
22 
23 kB = (1.381 * 6.02214) / 4184.0
24 
25 
26 class PyMDMover:
27 
28  def __init__(self, cont, md, n_md_steps):
29  self.cont = cont
30  self.md = md
31  self.n_md_steps = n_md_steps
32  self.velkeys = [IMP.FloatKey("vx"),
33  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
34 
35  def store_move(self):
36  self.oldcoords = []
37  # self.oldvel=[]
38  for i in range(self.cont.get_number_of_particles()):
39  p = self.cont.get_particle(i)
40  #self.oldvel.append([p.get_value(k) for k in self.velkeys])
41  d = IMP.core.XYZ(p)
42  self.oldcoords.append(d.get_coordinates())
43 
44  def propose_move(self, prob):
45  self.md.optimize(self.n_md_steps)
46 
47  def reset_move(self):
48  for i in range(self.cont.get_number_of_particles()):
49  p = self.cont.get_particle(i)
50  # for j,k in enumerate(self.velkeys):
51  # p.set_value(k, self.oldvel[i][j])
52  d = IMP.core.XYZ(p)
53  d.set_coordinates(self.oldcoords[i])
54 
55  def get_number_of_steps(self):
56  return self.n_md_steps
57 
58  def set_number_of_steps(self, nsteps):
59  self.n_md_steps = nsteps
60 
61 
62 class PyMC(IMP.Optimizer):
63  debug = True
64 
65  def __init__(self, model):
66  self.m = model
67 
68  def add_mover(self, mv):
69  self.mv = mv
70 
71  def set_kt(self, kT):
72  self.kT = kT
73 
74  def set_return_best(self, thing):
75  pass
76 
77  def set_move_probability(self, thing):
78  pass
79 
80  def get_energy(self):
81  pot = self.m.evaluate(False)
82  # pot=self.get_restraints().evaluate(False)
83  kin = self.mv.md.get_kinetic_energy()
84  return pot + kin
85 
86  def metropolis(self, old, new):
87  deltaE = new - old
88  if self.debug:
89  print(": old %f new %f deltaE %f new_epot: %f" % (old, new, deltaE,
90  self.m.evaluate(
91  False)), end=' ')
92  kT = self.kT
93  if deltaE < 0:
94  return True
95  else:
96  return exp(-deltaE / kT) > random()
97 
98  def optimize(self, nsteps):
99  self.naccept = 0
100  print("=== new MC call")
101  # store initial coordinates
102  self.mv.store_move()
103  for i in range(nsteps):
104  print("MC step %d " % i, end=' ')
105  # draw new velocities
106  self.mv.md.assign_velocities(self.kT / kB)
107  # get total energy
108  old = self.get_energy()
109  # make a MD move
110  self.mv.propose_move(1)
111  # get new total energy
112  new = self.get_energy()
113  if self.metropolis(old, new):
114  # move was accepted: store new conformation
115  self.mv.store_move()
116  self.naccept += 1
117  print("accepted ")
118  else:
119  # move rejected: restore old conformation
120  self.mv.reset_move()
121  print(" ")
122 
123  def get_number_of_forward_steps(self):
124  return self.naccept
125 
126 
128 
129  """nonspecific methods used across all shared function objects.
130  Rules:
131  - Their name starts with the name of the parent function (e.g.
132  init_model_* )
133  - they don't store anything in the class, but instead
134  return all created objects.
135  Exceptions: the model, which is self._m
136  the statistics class, which is self.stat
137  - they store what they have to store in the model (e.g. restraints)
138  - they don't print out anything except for long routines (e.g. NOE
139  parsing)
140  - the prior RestraintSet is added to the model when it is created, so
141  that when it is passed to another function, it is not added twice.
142  """
143 
144  def hello(self):
145  return "hello world"
146 
147  def set_checklevel(self, value):
148  IMP.set_check_level(value)
149 
150  def set_loglevel(self, value):
151  IMP.set_log_level(value)
152 
153  def m(self, name, *args, **kw):
154  "wrapper to call methods of m"
155  func = getattr(self._m, name)
156  return func(*args, **kw)
157 
158  def init_model_base(self, wd):
159  "moves to wd and creates model"
160  os.chdir(wd)
161  # Create an IMP model
162  self._m = IMP.Model()
163 
165  self, initpdb, top, par, selector, pairscore,
166  ff_temp=300.0, disulfides=None, representation='custom'):
167  """creates a CHARMM protein representation.
168  creates the charmm force field, bonded and nonbonded.
169  - initpdb: initial structure in pdb format
170  - top is a CHARMM top.lib, read if representation=='custom' (default)
171  - par is a CHARMM par.lib
172  - selector is an instance of
173  one of the selectors of IMP.atom, for example
174  IMP.atom.NonWaterNonHydrogenPDBSelector().
175  - pairscore is an instance of a Pair Score to score the interaction
176  between two atoms. usually, it's either
177  LennardJonesDistancePairScore(0,1) or
178  RepulsiveDistancePairScore(0,1)
179  - ff_temp is the temperature at which the force field should be
180  simulated.
181  - disulfides: if not None, a list of tuples corresponding to residue
182  numbers that should be cross-linked. Residues should be
183  cysteines, and residue numbering should start at 0.
184  - representation: 'full' : all-atom CHARMM force field
185  'heavy': heavy atom CHARMM forcefield with polar H
186  'calpha': only C alphas, ball and stick model with
187  bondlength 3.78 angstrom, beads at VdW
188  contact, and harmonic restraint between
189  them.
190  'custom' (default) : read given CHARMM top and par
191  files.
192  Returns: prot, ff, rsb, rs
193  - prot: the protein
194  - ff: the force field
195  - rsb: the RestraintSet on bonded interactions
196  - rs: the RestraintSet on nonbonded interactions. Both are weighted
197  using ff_temp.
198  """
199  m = self._m
200  prot = IMP.atom.read_pdb(initpdb, m, selector)
201  if not prot.get_is_valid(True):
202  raise ValueError("invalid hierarchy!")
203  if representation == 'custom':
204  # Read in the CHARMM heavy atom topology and parameter files
205  ff = IMP.atom.CHARMMParameters(top, par)
206  elif representation == 'heavy':
208  elif representation == 'full':
210  elif representation == 'calpha':
211  pass
212  else:
213  raise NotImplementedError(representation)
214  if representation == 'calpha':
215  print("setting up simplified C alpha force field")
216  # set radii
217  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
218  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
219  # IMP.atom.Mass(ca.get_particle()).set_mass(1.)
220  # create bonds by getting pairs and applying a pairscore
221  pairs = []
222  for chain in prot.get_children():
223  residues = [(chain.get_child(i), chain.get_child(i + 1))
224  for i in range(chain.get_number_of_children() - 1)]
225  residues = [(i.get_child(0).get_particle(),
226  j.get_child(0).get_particle())
227  for (i, j) in residues]
228  pairs.extend(residues)
229  bonds = []
230  for (i, j) in pairs:
231  # check, because it fails if you try twice
233  bi = IMP.atom.Bonded(i)
234  else:
237  bi = IMP.atom.Bonded(i)
238  else:
240  bond = IMP.atom.create_custom_bond(bi, bj, 3.78, 10) # stiff
241  bonds.append(bond)
242  bonds_container = IMP.container.ListSingletonContainer(self._m,
243  IMP.get_indexes(bonds))
244  hdps = IMP.core.Harmonic(0, 1)
245  bs = IMP.atom.BondSingletonScore(hdps)
246  br = IMP.container.SingletonsRestraint(bs, bonds_container)
247  rsb = IMP.RestraintSet("bonded")
248  rsb.add_restraint(br)
249  rsb.set_weight(1.0 / (kB * ff_temp))
250  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
251  nonbonded_pair_filter.set_bonds(bonds)
252  ff = None
253  else:
254  print("setting up CHARMM forcefield")
255  #
256  # Using the CHARMM libraries, determine the ideal topology (atoms and their
257  # connectivity) for the PDB file's primary sequence
258  topology = ff.create_topology(prot)
259  # Typically this modifies the C and N termini of each chain in the protein by
260  # applying the CHARMM CTER and NTER patches. Patches can also be manually
261  # applied at this point, e.g. to add disulfide bridges.
262  topology.apply_default_patches()
263  # disulfides
264  if disulfides:
265  s = topology.get_segment(0)
266  dis = ff.get_patch('DISU')
267  for (i, j) in disulfides:
268  self.find_atom((i, 'SG'), prot)
269  self.find_atom((j, 'SG'), prot)
270  r0 = s.get_residue(i)
271  r1 = s.get_residue(j)
272  if i == 0:
273  r0.set_patched(False)
274  if j == 0:
275  r1.set_patched(False)
276  dis.apply(r0, r1)
277  print("added disulfide bridge between cysteines %d and %d" % (i, j))
278  # Make the PDB file conform with the topology; i.e. if it contains extra
279  # atoms that are not in the CHARMM topology file, remove them; if it is
280  # missing atoms (e.g. sidechains, hydrogens) that are in the CHARMM topology,
281  # add them and construct their Cartesian coordinates from internal coordinate
282  # information.
283  topology.setup_hierarchy(prot)
284  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
285  # impropers) of the CHARMM forcefield
286  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
287  rsb = IMP.RestraintSet(m, 1.0, "bonded")
288  rsb.add_restraint(r)
289  #
290  # Add non-bonded interaction (in this case, Lennard-Jones). This needs to
291  # know the radii and well depths for each atom, so add them from the forcefield
292  # (they can also be assigned manually using the XYZR or LennardJones
293  # decorators):
294  ff.add_radii(prot)
295  ff.add_well_depths(prot)
296  nonbonded_pair_filter = r.get_pair_filter()
297  # Get a list of all atoms in the protein, and put it in a container
298  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
300  IMP.get_indexes(atoms))
301  # Add a restraint for the Lennard-Jones interaction. This is built from
302  # a collection of building blocks. First, a ClosePairContainer maintains a list
303  # of all pairs of Particles that are close. Next, all 1-2, 1-3 and 1-4 pairs
304  # from the stereochemistry created above are filtered out.
305  # Then, a LennardJonesPairScore scores a pair of atoms with the Lennard-Jones
306  # potential. Finally, a PairsRestraint is used which simply applies the
307  # LennardJonesPairScore to each pair in the ClosePairContainer.
308  nbl = IMP.container.ClosePairContainer(cont, 4.0)
309  nbl.add_pair_filter(nonbonded_pair_filter)
310  # should weight the ff restraint by a temperature, set to 300K.
311  pr = IMP.container.PairsRestraint(pairscore, nbl)
312  rs = IMP.RestraintSet(m, 1.0 / (kB * ff_temp), 'phys')
313  rs.add_restraint(pr)
314  return prot, ff, rsb, rs
315 
316  def init_model_setup_scale(self, default, lower=None, upper=None):
317  """sets up a Scale particle to the initial default value. It can
318  optionnally be constrained between two positive bounds, or else its
319  range is 0 to infinity.
320  """
321  m = self._m
322  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m), default)
323  if lower:
324  scale.set_lower(lower)
325  if upper:
326  scale.set_upper(upper)
327  return scale
328 
329  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
330  """given a list of scales, returns a RestraintSet('prior') with weight
331  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.
332  If argument prior_rs is used, add them to that RestraintSet instead.
333  """
334  if not prior_rs:
335  prior_rs = IMP.RestraintSet('prior')
336  prior_rs.set_weight(1.0)
337  for i in scales:
338  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
339  return prior_rs
340 
341  def init_model_jeffreys(self, scales, prior_rs=None):
342  """given a list of scales, returns a RestraintSet('prior') with weight
343  1.0 that contains a list of JeffreysRestraint on each scale.
344  If argument prior_rs is used, add them to that RestraintSet instead.
345  """
346  if not prior_rs:
347  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
348  for i in scales:
349  jr = IMP.isd.JeffreysRestraint(self._m, i)
350  prior_rs.add_restraint(jr)
351  return prior_rs
352 
353  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
354  """given a list of scales, returns a RestraintSet('prior') with weight
355  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
356  scale. If argument prior_rs is used, add them to that RestraintSet
357  instead.
358  """
359  if not (0 <= R <= c):
360  raise ValueError("parameters R and c should satisfy 0 <= R <= c")
361  if not prior_rs:
362  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
363  for i in scales:
364  prior_rs.add_restraint(
366  return prior_rs
367 
368  def find_atom(self, atom, prot):
369  """scans the prot hierarchy and tries to find atom = (resno, name)
370  assumes that resno follows the same numbering as the sequence.
371  Stores already found atoms for increased speed.
372  """
373  if not hasattr(self, '__memoized'):
374  self.__memoized = {prot: {}}
375  try:
376  return self.__memoized[prot][atom]
377  except:
378  pass
379  try:
380  sel = IMP.atom.Selection(hierarchy=prot,
381  residue_index=atom[0],
382  atom_type=IMP.atom.AtomType(atom[1])
383  ).get_selected_particles()
384  if len(sel) > 1:
385  print("found multiple atoms for atom %d %s!" % atom)
386  return
387  p0 = IMP.core.XYZ(sel[0])
388  except:
389  print("atom %d %s not found" % atom)
390  return
391  self.__memoized[prot][atom] = p0
392  return p0
393 
394  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
395  """
396  Sets up a vonMises torsion angle restraint using the given kappa
397  particle as concentration parameter. Returns the restraint.
398  data is a list of observations.
399  """
400  return IMP.isd.TALOSRestraint(atoms, data, kappa)
401 
402  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
403  """
404  Sets up a vonMises torsion angle restraint using the given kappa
405  particle as concentration parameter. Returns the restraint.
406  data is (mean, standard deviation).
407  """
408  raise NotImplementedError
409 
410  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
411  """
412  Sets up a lognormal distance restraint using the given sigma and gamma.
413  Returns the restraint.
414  assumes atoms = (atom1, atom2)
415  where atom1 is (resno, atomname) and resno is the residue sequence
416  number.
417  """
418  # find corresponding atoms
419  p0 = self.find_atom(atoms[0], prot)
420  p1 = self.find_atom(atoms[1], prot)
421  # create lognormal restraint using gamma_data = 1
423  self._m,
424  p0,
425  p1,
426  sigma,
427  gamma,
428  distance ** (-6))
429  return ln
430 
431  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
432  sigma, gamma):
433  """Reads an ambiguous NOE restraint. contributions is a list of
434  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
435  lognormal distance restraint using the given sigma and gamma.
436  Returns the restraint.
437  """
438  # create pairs list
439  pairs = [(self.find_atom(i, prot).get_particle(),
440  self.find_atom(j, prot).get_particle()) for (i, j) in
441  contributions]
442  pairs = IMP.container.ListPairContainer(pairs)
443  # create restraint
445  pairs, sigma, gamma, distance ** (-6))
446  return ln
447 
449  self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
450  bounds_sigma=(1.0, 0.1, 100), bounds_gamma=(1.0, 0.1, 100),
451  verbose=True, sequence_match=(1, 1)):
452  """read TBL file and store NOE restraints, using one sigma and one gamma
453  for the whole dataset. Creates the necessary uninformative priors.
454  - prot: protein hierarchy
455  - seqfile: a file with 3-letter sequence
456  - tblfile: a TBL file with the restraints
457  - name: an optional name for the restraintset
458  - prior_rs: when not None, add new sigma and gamma to this
459  RestraintSet instance.
460  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
461  bounds can be -1 to set to default range [0,+inf]
462  - verbose: be verbose (default True)
463  - sequence_match : (noe_start, sequence_start)
464  Returns: data_rs, prior_rs, sigma, gamma
465  """
466  # prior
467  if verbose:
468  print("Prior for the NOE Scales")
469  sigma = self.init_model_setup_scale(*bounds_sigma)
470  gamma = self.init_model_setup_scale(*bounds_gamma)
471  prior_rs = self.init_model_jeffreys([sigma, gamma], prior_rs)
472  # likelihood
473  rs = IMP.RestraintSet(self._m, 1.0, name)
474  # use the TBLReader to parse the TBL file.
475  sequence = IMP.isd.utils.read_sequence_file(seqfile,
476  first_residue_number=sequence_match[1])
477  tblr = IMP.isd.TBLReader.TBLReader(sequence,
478  sequence_match=sequence_match)
479  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
480  for i, restraint in enumerate(restraints):
481  if verbose and i % 100 == 0:
482  print("\r%d" % i, end=' ')
483  sys.stdout.flush()
484  # a restraint is (contributions, dist, upper, lower, volume)
485  # where contributions is a tuple of contributing pairs
486  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
487  if len(restraint[0]) > 1:
489  prot, restraint[0],
490  restraint[1], sigma, gamma)
491  else:
492  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
493  restraint[1], sigma, gamma)
494  rs.add_restraint(ln)
495  if verbose:
496  print("\r%d NOE restraints read" % i)
497  # set weight of rs.
498  # Weight is 1.0 cause sigma particle already has this role.
499  return rs, prior_rs, sigma, gamma
500 
501  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
502  verbose=True, sequence_match=(1, 1)):
503  """read TBL file and store NOE restraints, using the marginal of the
504  lognormal with one sigma and one gamma, for the whole dataset.
505  - prot: protein hierarchy
506  - seqfile: a file with 3-letter sequence
507  - tblfile: a TBL file with the restraints
508  - name: an optional name for the restraintset
509  - verbose: be verbose (default True)
510  - sequence_match : (noe_start, sequence_start)
511  Returns: data_rs
512  """
513  # likelihood
514  rs = IMP.RestraintSet(self._m, 1.0, name)
515  # use the TBLReader to parse the TBL file.
516  sequence = IMP.isd.utils.read_sequence_file(seqfile,
517  first_residue_number=sequence_match[1])
518  tblr = IMP.isd.TBLReader.TBLReader(sequence,
519  sequence_match=sequence_match)
520  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
522  for i, restraint in enumerate(restraints):
523  if verbose and i % 100 == 0:
524  print("\r%d" % i, end=' ')
525  sys.stdout.flush()
526  # a restraint is (contributions, dist, upper, lower, volume)
527  # where contributions is a tuple of contributing pairs
528  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
529  # residue numbers start at 0
530  pairs = [(self.find_atom(i, prot).get_particle(),
531  self.find_atom(j, prot).get_particle()) for (i, j) in
532  restraint[0]]
533  pairs = IMP.container.ListPairContainer(pairs)
534  ln.add_contribution(pairs, restraint[4])
535  rs.add_restraint(ln)
536  if verbose:
537  print("\r%d NOE contributions added" % (len(restraints)))
538  return rs
539 
540  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
541  verbose=True):
542  """read TBL file and store lognormal restraints, using the marginal of the
543  lognormal with one sigma and gamma=1, for the whole dataset.
544  - prot: protein hierarchy
545  - seqfile: a file with 3-letter sequence
546  - tblfile: a TBL file with the restraints
547  - name: an optional name for the restraintset
548  - verbose: be verbose (default True)
549  Returns: data_rs
550  """
551  # likelihood
552  rs = IMP.RestraintSet(self._m, 1.0, name)
553  # use the TBLReader to parse the TBL file.
554  sequence = IMP.isd.utils.read_sequence_file(seqfile)
555  tblr = IMP.isd.TBLReader.TBLReader(sequence)
556  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
558  for i, restraint in enumerate(restraints):
559  if verbose and i % 100 == 0:
560  print("\r%d" % i, end=' ')
561  sys.stdout.flush()
562  # a restraint is (contributions, dist, upper, lower, volume)
563  # where contributions is a tuple of contributing pairs
564  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
565  # residue numbers start at 0
566  pairs = [(self.find_atom(i, prot).get_particle(),
567  self.find_atom(j, prot).get_particle()) for (i, j) in
568  restraint[0]]
569  pairs = IMP.container.ListPairContainer(pairs)
570  ln.add_contribution(pairs, restraint[4])
571  rs.add_restraint(ln)
572  if verbose:
573  print("\r%d Hbond contributions added" % (len(restraints)))
574  return rs
575 
576  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
577  sequence_match=(1, 1), name='TALOS', prior_rs=None,
578  bounds_kappa=(1.0, 0.1, 10), verbose=True, prior='jeffreys',
579  keep_all=False):
580  """read TALOS dihedral angle data, and create restraints for phi/psi
581  torsion angles, along with the prior for kappa, which is a scale for the
582  whole dataset, compare to 1/sigma**2 in the NOE case.
583  - prot: protein hierarchy
584  - seqfile: a file with 3-letter sequence
585  - talos_data: either a file (pred.tab), or a folder (pred/) in which all
586  files in the form res???.tab can be found. If possible,
587  try to provide the folder, as statistics are more
588  accurate in that case.
589  - fulldata : either True or False, whether the data is the full TALOS
590  output (predAll.tab or pred/ folder), or just the averages
591  (pred.tab)
592  - sequence_match : (talos_no, sequence_no) to adjust for different
593  residue numberings
594  - name: an optional name for the restraintset
595  - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.
596  - bounds_kappa: tuple of (initial value, lower, upper bound)
597  bounds can be -1 to set to default range [0,+inf]
598  - verbose: be verbose (default True)
599  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
600  conjugate prior of the von Mises restraint, with parameters R
601  and c. Good values are R=0 and c=10. Default: jeffreys prior.
602  - keep_all: in case of a folder for 'talos_data', whether to keep
603  candidates marked as 'outliers' by TALOS, or to include them.
604  Returns: data_rs, prior_rs, kappa
605 
606  """
607  # prior
608  if verbose:
609  print("Prior for von Mises Kappa")
610  kappa = self.init_model_setup_scale(*bounds_kappa)
611  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
612  # likelihood
613  if verbose:
614  print("reading data")
615  rs = IMP.RestraintSet(self._m, 1.0, name)
616  sequence = IMP.isd.utils.read_sequence_file(seqfile,
617  first_residue_no=sequence_match[1])
618  if fulldata:
619  talosr = IMP.isd.TALOSReader.TALOSReader(sequence, True, keep_all,
620  sequence_match=sequence_match)
621  if os.path.isdir(talos_data):
622  # using pred/res???.tab files
623  for i, res in enumerate(glob(os.path.join(talos_data, 'res???.tab'))):
624  if verbose and i % 100:
625  print("\r%d" % i, end=' ')
626  sys.stdout.flush()
627  talosr.read(res)
628  else:
629  # using predAll.tab file
630  talosr.read(talos_data)
631  else:
632  # using pred.tab file and correcting for estimates
633  talosr = IMP.isd.TALOSReader.TALOSReader(sequence, False, keep_all,
634  sequence_match=sequence_match)
635  talosr.read(talos_data)
636  # get harvested data and create restraints
637  data = talosr.get_data()
638  if verbose:
639  print("\rread dihedral data for %d residues" % len(data))
640  print("creating restraints")
641  avgR = []
642  for resno, datum in data.items():
643  phidata = datum['phi']
644  psidata = datum['psi']
645  num = datum['num']
646  res = IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
649  if fulldata:
651  phi, phidata, kappa)
652  rs.add_restraint(r)
653  if verbose:
654  avgR.append(r.get_R0())
656  psi, psidata, kappa)
657  rs.add_restraint(r)
658  if verbose:
659  avgR.append(r.get_R0())
660  else:
662  phi, phidata, kappa)
663  rs.add_restraint(r)
664  if verbose:
665  avgR.append(r.get_R0())
667  psi, psidata, kappa)
668  rs.add_restraint(r)
669  if verbose:
670  avgR.append(r.get_R0())
671  if verbose:
672  print("%s Restraints created. Average R0: %f" % \
673  (len(avgR), sum(avgR) / float(len(avgR))))
674  return rs, prior_rs, kappa
675 
677  self, prot, profilefile, name='SAXS',
678  ff_type=IMP.saxs.HEAVY_ATOMS):
679  """read experimental SAXS profile and apply restraint the standard
680  way (like foxs)
681  Returns: a restraintset and the experimental profile
682  """
683  rs = IMP.RestraintSet(self._m, 1.0, name)
684  saxs_profile = IMP.saxs.Profile(profilefile)
685  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
686  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
687  rs.add_restraint(saxs_restraint)
688  return rs, saxs_profile
689 
690  def _setup_md(self, prot, temperature=300.0, thermostat='berendsen',
691  coupling=500, md_restraints=None, timestep=1.0, recenter=1000,
692  momentum=1000):
693  """setup molecular dynamics
694  - temperature: target temperature
695  - thermostat: one of 'NVE', rescale_velocities',
696  'berendsen', 'langevin'
697  - coupling: coupling constant (tau (fs) for berendsen,
698  gamma (/fs) for langevin)
699  - md_restraints: if not None, specify the terms of the energy to be used
700  during the md steps via a list of restraints.
701  - timestep: in femtoseconds.
702  - recenter: recenter the molecule every so many steps (Langevin only)
703  - momentum: remove angular momentum every so many steps (Berendsen only)
704  Returns: an instance of md and an instance of an OptimizerState (the
705  thermostat), or None if NVE.
706  """
707  # Molecular Dynamics (from MAX BONOMI)
708  md = IMP.atom.MolecularDynamics(self._m)
709  md.assign_velocities(temperature)
710  md.set_time_step(timestep)
711  if thermostat == 'NVE':
712  os = None
713  elif thermostat == 'rescale_velocities':
715  IMP.atom.get_leaves(prot), temperature)
716  md.add_optimizer_state(os)
717  elif thermostat == 'berendsen':
719  IMP.atom.get_leaves(prot), temperature, coupling)
720  md.add_optimizer_state(os)
722  IMP.atom.get_leaves(prot), momentum)
723  md.add_optimizer_state(mom)
724  elif thermostat == 'langevin':
726  IMP.atom.get_leaves(prot), temperature, coupling)
727  md.add_optimizer_state(os)
728  # cen = IMP.atom.RemoveTranslationOptimizerState(
729  # IMP.atom.get_leaves(prot), recenter)
730  # md.add_optimizer_state(cen)
731  else:
732  raise NotImplementedError(thermostat)
733 
734  if md_restraints:
735  md.set_restraints(md_restraints)
736 
737  return md, os
738 
739  def _setup_normal_mover(self, particle, floatkey, stepsize):
740  """setup NormalMover to move particle's floatkey attribute
741  by a gaussian with standard deviation 'stepsize'
742  Returns: mover instance.
743  """
744  nm = IMP.core.NormalMover(self._m, particle.get_particle_index(),
745  IMP.FloatKeys([floatkey]), stepsize)
746  return nm
747 
748  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
749  """setup MDMover using md and particles.
750  - md: md instance
751  - particles: particles to move, usually the leaves of the protein
752  hierarchy
753  - n_md_steps: number of md steps to perform on each move
754  Returns: mover instance.
755  """
757  cont.add(IMP.get_indexes(particles))
758  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
759 
760  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
761  """setup monte carlo using a certain mover.
762  - mover: mover to use, NormalMover or MDMover usually.
763  - temperature: target temperature.
764  - mc_restraints: if not None, list of restraints for the metropolis
765  criterion.
766  Returns: mc instance.
767  """
768  mc = IMP.core.MonteCarlo(self._m)
769  # why is this returning an int?
770  mc.add_mover(mover)
771  # set same temp as MD, careful with units
772  mc.set_kt(kB * temperature)
773  # allow to go uphill
774  mc.set_return_best(False)
775  if mc_restraints:
776  # tell mc to only use restraints involving the particle
777  mc.set_restraints(mc_restraints)
778  return mc
779 
780  def init_simulation_setup_protein_hmc_hopper(self, prot, temperature=300.0,
781  gamma=0.01, n_md_steps=10,
782  md_restraints=None, mc_restraints=None, timestep=1.0,
783  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
784  """setup hybrid monte-carlo on protein. Uses basin hopping with steepest
785  descent minimization.
786  - prot: protein hierarchy.
787  - temperature: target temperature.
788  - gamma: coupling constant for langevin (/fs)
789  - n_md_steps: number of md steps per mc step
790  - md_restraints: if not None, specify the terms of the energy to be used
791  during the md steps.
792  - mc_restraints: if not None, use these energy terms for the metropolis
793  criterion.
794  - timestep: time step for md, in femtoseconds.
795  - sd_threshold: stop steepest descent after energy difference drops
796  below that threshold
797  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
798  - sd_maxsteps: maximum number of steps for steepest descent
799  Returns: hmc, mdmover, md and OptimizerState (thermostat)
800  """
801  raise NotImplementedError
802  md, os = self._setup_md(prot, temperature=temperature,
803  thermostat='langevin', coupling=gamma,
804  md_restraints=md_restraints, timestep=timestep)
805  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
806  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
807  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
808  sd = IMP.core.SteepestDescent(self._m)
809  sd.set_threshold(sd_threshold)
810  sd.set_step_size(sd_stepsize)
811  hmc.set_local_optimizer(sd)
812  hmc.set_local_steps(sd_maxsteps)
813  hmc.set_use_basin_hopping(True)
814  return hmc, mdmover, md, os
815 
816  def init_simulation_setup_protein_hmc_nve(self, prot, temperature=300.0,
817  n_md_steps=100, md_restraints=None, mc_restraints=None,
818  timestep=1.0):
819  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
820  - prot: protein hierarchy.
821  - temperature: target temperature.
822  - coupling: coupling constant (tau (fs) for berendsen,
823  gamma (/fs) for langevin)
824  - n_md_steps: number of md steps per mc step
825  - md_restraints: if not None, specify the terms of the energy to be used
826  during the md steps.
827  - mc_restraints: if not None, use these energy terms for the metropolis
828  criterion.
829  - timestep: time step for md, in femtoseconds.
830  - sd_threshold: stop steepest descent after energy difference drops
831  below that threshold
832  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
833  - sd_maxsteps: maximum number of steps for steepest descent
834  Returns: hmc, mdmover and md
835  """
836  prot = self._p['prot']
837  md, os = self._setup_md(prot, temperature=temperature,
838  thermostat='NVE', coupling=None, timestep=1.0,
839  md_restraints=md_restraints)
840  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
842  cont.add(IMP.get_indexes(particles))
843  mdmover = PyMDMover(cont, md, n_md_steps)
844  mdmover.m = self._m
845  #mdmover = IMP.atom.MDMover(cont, md, temperature, n_md_steps)
846  mc = PyMC(self._m)
847  #mc = IMP.core.MonteCarlo(self._m)
848  # why is this returning an int?
849  mc.add_mover(mdmover)
850  # set same temp as MD, careful with units
851  mc.set_kt(kB * temperature)
852  # allow to go uphill
853  mc.set_return_best(False)
854  # update all particles each time
855  mc.set_move_probability(1.0)
856  return mc, mdmover, md
857 
858  def init_simulation_setup_nuisance_mc(self, nuis, temperature=300.0,
859  mc_restraints=None, nm_stepsize=0.1):
860  """sets up monte carlo on nuisance, at a certain target temperature,
861  optionnally using a certain set of restraints only.
862  - nuis: nuisance particle
863  - temperature: target temperature
864  - mc_restraints: optional set of restraints from which the energy should
865  be drawn instead of the energy of the complete system.
866  - floatkey: the floatkey to move.
867  - nm_stepsize: the stepsize of the normal mover
868  Returns: mc instance, nm instance.
869  """
870  nm = self._setup_normal_mover(nuis, nuis.get_nuisance_key(),
871  nm_stepsize)
872  mc = self._setup_mc(nm, temperature, mc_restraints)
873  return mc, nm
874 
875  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
876  adjust_stepsize=True):
877  """run mc using a normal mover on a single particle,
878  update stepsize and store nsteps, acceptance and stepsize in the
879  statistics instance self.stat by using the key stats_key.
880  """
881  # do the monte carlo
882  mc.optimize(nsteps)
883  naccept = mc.get_number_of_forward_steps()
884  # increment the counter for this simulation
885  self.stat.increment_counter(stats_key, nsteps)
886  # get the acceptance rate, stepsize
887  accept = float(naccept) / nsteps
888  self.stat.update(stats_key, 'acceptance', accept)
889  stepsize = nm.get_sigma()
890  self.stat.update(stats_key, 'stepsize', stepsize)
891  # update stepsize if needed and requested
892  if adjust_stepsize:
893  if 0.4 < accept < 0.6:
894  return
895  if accept < 0.05:
896  accept = 0.05
897  if accept > 1.0:
898  accept = 1.0
899  nm.set_sigma(stepsize * 2 * accept)
900 
901  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
902  adjust_stepsize=True):
903  """run hmc, update stepsize and print statistics. Updates number of MD
904  steps to reach a target acceptance between 0.4 and 0.6, sends
905  statistics to self.stat. MD steps are always at least 10 and at most 500.
906  """
907  hmc.optimize(nsteps)
908  naccept = hmc.get_number_of_forward_steps()
909  self.stat.increment_counter(stats_key, nsteps)
910  accept = float(naccept) / nsteps
911  self.stat.update(stats_key, 'acceptance', accept)
912  mdsteps = mv.get_number_of_steps()
913  self.stat.update(stats_key, 'n_md_steps', mdsteps)
914  if adjust_stepsize:
915  if 0.4 < accept < 0.6:
916  return
917  mdsteps = int(mdsteps * 2 ** (accept - 0.5))
918  if mdsteps > 500:
919  mdsteps = 500
920  if mdsteps < 10:
921  mdsteps = 10
922  mv.set_nsteps(mdsteps)
923 
924  def get_pdb(self, prot):
925  """returns a string corresponding to the pdb structure of hierarchy
926  prot.
927  """
928  output = StringIO()
929  IMP.atom.write_pdb(prot, output)
930  return output.getvalue()
931 
932  def get_netcdf(self, prot):
933  raise NotImplementedError
934 
935  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
936  temperature=300.0, prot_coordinates=None):
937  """updates statistics for md simulation: target temp, kinetic energy,
938  kinetic temperature, writes coordinates and increments counter.
939  - md_key: stats md key
940  - nsteps: number of steps performed.
941  - md_instance: instance of the MolecularDynamics class.
942  - temperature: target temperature
943  - prot_coordinates: protein coordinates to be passed to the stats class,
944  (should be a string)
945  """
946  self.stat.update(md_key, 'target_temp', temperature)
947  kinetic = md_instance.get_kinetic_energy()
948  self.stat.update(md_key, 'E_kinetic', kinetic)
949  self.stat.update(md_key, 'instant_temp',
950  md_instance.get_kinetic_temperature(kinetic))
951  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
952  self.stat.increment_counter(md_key, nsteps)
953 
954  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
955  """shortcut for a frequent series of operations on MC simulations'
956  statistics. Creates an entry for acceptance, stepsize and one
957  coordinate set printed in the statistics file.
958  """
959  # create category
960  mc_key = stat.add_category(name=name)
961  # giving None as argument is a way to create a static entry.
962  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
963  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
964  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
965  # special call to add coordinates to be dumped
966  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
967  # add the counter to the output
968  stat.add_entry(mc_key, name='counter')
969  return mc_key
970 
971  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
972  """shortcut for a frequent series of operations on MD simulations'
973  statistics. Creates an entry for target temp, instantaneous temp,
974  kinetic energy, and one set of coordinates called 'protein' by
975  default.
976  """
977  # create category
978  md_key = stat.add_category(name=name)
979  # giving None as argument is a way to create a static entry.
980  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
981  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
982  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
983  # special call to add coordinates to be dumped
984  stat.add_coordinates(md_key, coord)
985  # add the counter to the output
986  stat.add_entry(md_key, name='counter')
987  return md_key
988 
989  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
990  """shortcut for a frequent series of operations on HMC simulations'
991  statistics. Adds acceptance, number of MD steps and a trajectory for
992  a protein.
993  """
994  # create category
995  hmc_key = stat.add_category(name=name)
996  # giving None as argument is a way to create a static entry.
997  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
998  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
999  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
1000  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
1001  # special call to add coordinates to be dumped
1002  stat.add_coordinates(hmc_key, coord)
1003  # add the counter to the output
1004  stat.add_entry(hmc_key, name='counter')
1005  return hmc_key
1006 
1007  def rescale_velocities(self, particles, factor):
1008  """rescale the velocities of a bunch of particles having vx vy and vz
1009  floatkeys
1010  """
1011  keys = [IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
1012  for p in particles:
1013  for k in keys:
1014  p.set_value(k, p.get_value(k) * factor)
Applies a SingletonScore to each Singleton in a list.
def rescale_velocities
rescale the velocities of a bunch of particles having vx vy and vz floatkeys
CHARMMParameters * get_heavy_atom_CHARMM_parameters()
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
def init_model_ambiguous_NOE_restraint
Reads an ambiguous NOE restraint.
Maintains temperature during molecular dynamics.
Enforce CHARMM stereochemistry on the given Hierarchy.
Atoms get_phi_dihedral_atoms(Residue rd)
Return the atoms comprising the phi dihedral.
def do_md_protein_statistics
updates statistics for md simulation: target temp, kinetic energy, kinetic temperature, writes coordinates and increments counter.
def init_model_charmm_protein_and_ff
creates a CHARMM protein representation.
def find_atom
scans the prot hierarchy and tries to find atom = (resno, name) assumes that resno follows the same n...
def init_model_NOEs
read TBL file and store NOE restraints, using one sigma and one gamma for the whole dataset...
Various classes to hold sets of particles.
def init_simulation_setup_nuisance_mc
sets up monte carlo on nuisance, at a certain target temperature, optionnally using a certain set of ...
A decorator for a particle which has bonds.
def init_simulation_setup_protein_hmc_nve
setup hybrid monte-carlo on protein.
Calculate score based on fit to SAXS profile.
A simple steepest descent optimizer.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
def init_model_TALOS
read TALOS dihedral angle data, and create restraints for phi/psi torsion angles, along with the prio...
Ambiguous NOE distance restraint between a number of pairs of particles.
The type of an atom.
def get_pdb
returns a string corresponding to the pdb structure of hierarchy prot.
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
def init_model_base
moves to wd and creates model
def init_model_NOEs_marginal
read TBL file and store NOE restraints, using the marginal of the lognormal with one sigma and one ga...
Return all close unordered pairs of particles taken from the SingletonContainer.
Classes to handle ISD statistics files.
Definition: Entry.py:1
def init_model_NOE_restraint
Sets up a lognormal distance restraint using the given sigma and gamma.
void read_pdb(TextInput input, int model, Hierarchy h)
CHARMM force field parameters.
Object used to hold a set of restraints.
Definition: RestraintSet.h:36
Bond create_custom_bond(Bonded a, Bonded b, Float length, Float stiffness=-1)
Connect the two wrapped particles by a custom bond.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Removes rigid translation and rotation from the particles.
def init_model_vonMises_restraint_full
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
Store a list of ParticleIndexPairs.
Conjugate prior for the concentration parameter of a von Mises distribution.
Maintains temperature during molecular dynamics by velocity scaling.
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
def init_model_jeffreys_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Simple molecular dynamics optimizer.
Store a list of ParticleIndexes.
Atoms get_psi_dihedral_atoms(Residue rd)
Return the atoms comprising the psi dihedral.
def init_stats_add_md_category
shortcut for a frequent series of operations on MD simulations' statistics.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Gather all the molecular particles of a certain level in the hierarchy.
def init_model_HBonds_marginal
read TBL file and store lognormal restraints, using the marginal of the lognormal with one sigma and ...
Apply an NOE distance restraint between two particles.
Definition: NOERestraint.h:23
Base class for all optimizers.
Definition: Optimizer.h:46
static Bonded setup_particle(Model *m, ParticleIndex pi)
def init_stats_add_mc_category
shortcut for a frequent series of operations on MC simulations' statistics.
def init_simulation_setup_protein_hmc_hopper
setup hybrid monte-carlo on protein.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
static Scale setup_particle(Model *m, ParticleIndex pi)
Definition: Scale.h:30
phi/psi dihedral restraint between four particles, using data from TALOS.
Score the bond based on a UnaryFunction,.
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:322
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:14
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
nonspecific methods used across all shared function objects.
void set_log_level(LogLevel l)
Set the current global log level.
A decorator for a residue.
Definition: Residue.h:134
Apply a lognormal distance restraint between two particles.
Basic functionality that is expected to be used by a wide variety of IMP users.
def init_model_standard_SAXS_restraint
read experimental SAXS profile and apply restraint the standard way (like foxs) Returns: a restraints...
def init_model_setup_scale
sets up a Scale particle to the initial default value.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def init_model_jeffreys
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of Jeffr...
A filter that excludes bonds, angles and dihedrals.
Class to handle individual model particles.
Definition: Particle.h:37
def init_model_vonMises_restraint_mean
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
def init_stats_add_hmc_category
shortcut for a frequent series of operations on HMC simulations' statistics.
def m
wrapper to call methods of m
Apply an NOE distance restraint between two particles.
Functionality for loading, creating, manipulating and scoring atomic structures.
CHARMMParameters * get_all_atom_CHARMM_parameters()
Miscellaneous utilities.
Definition: utils.py:1
Hierarchies get_leaves(const Selection &h)
Classes to handle TALOS files or folders.
Definition: TALOSReader.py:1
Select hierarchy particles identified by the biological name.
Definition: Selection.h:65
Applies a PairScore to each Pair in a list.
Support for small angle X-ray scattering (SAXS) data.
void set_check_level(CheckLevel tf)
Control runtime checks in the code.
Definition: exception.h:73
def init_model_conjugate_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
Classes to handle TBL files.
Definition: TBLReader.py:1
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24
Maintains temperature during molecular dynamics.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27