IMP logo
IMP Reference Guide  develop.234970c887,2024/04/29
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import sys
4 import os
5 from glob import glob
6 
7 import IMP
8 import IMP.atom
9 import IMP.container
10 import IMP.saxs
11 import IMP.core
12 import IMP.isd
13 import IMP.isd.TBLReader
15 import IMP.isd.utils
16 from IMP.isd.Entry import Entry
17 
18 from math import exp
19 from io import StringIO
20 
21 from random import random
22 
23 kB = (1.381 * 6.02214) / 4184.0
24 
25 
26 class PyMDMover:
27 
28  def __init__(self, cont, md, n_md_steps):
29  self.cont = cont
30  self.md = md
31  self.n_md_steps = n_md_steps
32  self.velkeys = [IMP.FloatKey("vx"),
33  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
34 
35  def store_move(self):
36  self.oldcoords = []
37  # self.oldvel=[]
38  for i in range(self.cont.get_number_of_particles()):
39  p = self.cont.get_particle(i)
40  d = IMP.core.XYZ(p)
41  self.oldcoords.append(d.get_coordinates())
42 
43  def propose_move(self, prob):
44  self.md.optimize(self.n_md_steps)
45 
46  def reset_move(self):
47  for i in range(self.cont.get_number_of_particles()):
48  p = self.cont.get_particle(i)
49  # for j,k in enumerate(self.velkeys):
50  # p.set_value(k, self.oldvel[i][j])
51  d = IMP.core.XYZ(p)
52  d.set_coordinates(self.oldcoords[i])
53 
54  def get_number_of_steps(self):
55  return self.n_md_steps
56 
57  def set_number_of_steps(self, nsteps):
58  self.n_md_steps = nsteps
59 
60 
61 class PyMC(IMP.Optimizer):
62  debug = True
63 
64  def __init__(self, model):
65  self.m = model
66 
67  def add_mover(self, mv):
68  self.mv = mv
69 
70  def set_kt(self, kT):
71  self.kT = kT
72 
73  def set_return_best(self, thing):
74  pass
75 
76  def set_move_probability(self, thing):
77  pass
78 
79  def get_energy(self):
80  pot = self.m.evaluate(False)
81  # pot=self.get_restraints().evaluate(False)
82  kin = self.mv.md.get_kinetic_energy()
83  return pot + kin
84 
85  def metropolis(self, old, new):
86  deltaE = new - old
87  if self.debug:
88  print(": old %f new %f deltaE %f new_epot: %f"
89  % (old, new, deltaE, self.m.evaluate(False)), end=' ')
90  kT = self.kT
91  if deltaE < 0:
92  return True
93  else:
94  return exp(-deltaE / kT) > random()
95 
96  def optimize(self, nsteps):
97  self.naccept = 0
98  print("=== new MC call")
99  # store initial coordinates
100  self.mv.store_move()
101  for i in range(nsteps):
102  print("MC step %d " % i, end=' ')
103  # draw new velocities
104  self.mv.md.assign_velocities(self.kT / kB)
105  # get total energy
106  old = self.get_energy()
107  # make a MD move
108  self.mv.propose_move(1)
109  # get new total energy
110  new = self.get_energy()
111  if self.metropolis(old, new):
112  # move was accepted: store new conformation
113  self.mv.store_move()
114  self.naccept += 1
115  print("accepted ")
116  else:
117  # move rejected: restore old conformation
118  self.mv.reset_move()
119  print(" ")
120 
121  def get_number_of_forward_steps(self):
122  return self.naccept
123 
124 
126 
127  """nonspecific methods used across all shared function objects.
128  Rules:
129  - Their name starts with the name of the parent function (e.g.
130  init_model_* )
131  - they don't store anything in the class, but instead
132  return all created objects.
133  Exceptions: the model, which is self._m
134  the statistics class, which is self.stat
135  - they store what they have to store in the model (e.g. restraints)
136  - they don't print out anything except for long routines (e.g. NOE
137  parsing)
138  - the prior RestraintSet is added to the model when it is created, so
139  that when it is passed to another function, it is not added twice.
140  """
141 
142  def hello(self):
143  return "hello world"
144 
145  def set_checklevel(self, value):
146  IMP.set_check_level(value)
147 
148  def set_loglevel(self, value):
149  IMP.set_log_level(value)
150 
151  def m(self, name, *args, **kw):
152  "wrapper to call methods of m"
153  func = getattr(self._m, name)
154  return func(*args, **kw)
155 
156  def init_model_base(self, wd):
157  "moves to wd and creates model"
158  os.chdir(wd)
159  # Create an IMP model
160  self._m = IMP.Model()
161 
163  self, initpdb, top, par, selector, pairscore,
164  ff_temp=300.0, disulfides=None, representation='custom'):
165  """creates a CHARMM protein representation.
166  creates the charmm force field, bonded and nonbonded.
167  - initpdb: initial structure in pdb format
168  - top is a CHARMM top.lib, read if representation=='custom' (default)
169  - par is a CHARMM par.lib
170  - selector is an instance of
171  one of the selectors of IMP.atom, for example
172  IMP.atom.NonWaterNonHydrogenPDBSelector().
173  - pairscore is an instance of a Pair Score to score the interaction
174  between two atoms. usually, it's either
175  LennardJonesDistancePairScore(0,1) or
176  RepulsiveDistancePairScore(0,1)
177  - ff_temp is the temperature at which the force field should be
178  simulated.
179  - disulfides: if not None, a list of tuples corresponding to residue
180  numbers that should be cross-linked. Residues should be
181  cysteines, and residue numbering should start at 0.
182  - representation: 'full' : all-atom CHARMM force field
183  'heavy': heavy atom CHARMM forcefield with polar H
184  'calpha': only C alphas, ball and stick model with
185  bondlength 3.78 angstrom, beads at VdW
186  contact, and harmonic restraint between
187  them.
188  'custom' (default) : read given CHARMM top and par
189  files.
190  Returns: prot, ff, rsb, rs
191  - prot: the protein
192  - ff: the force field
193  - rsb: the RestraintSet on bonded interactions
194  - rs: the RestraintSet on nonbonded interactions. Both are weighted
195  using ff_temp.
196  """
197  m = self._m
198  prot = IMP.atom.read_pdb(initpdb, m, selector)
199  if not prot.get_is_valid(True):
200  raise ValueError("invalid hierarchy!")
201  if representation == 'custom':
202  # Read in the CHARMM heavy atom topology and parameter files
203  ff = IMP.atom.CHARMMParameters(top, par)
204  elif representation == 'heavy':
206  elif representation == 'full':
208  elif representation == 'calpha':
209  pass
210  else:
211  raise NotImplementedError(representation)
212  if representation == 'calpha':
213  print("setting up simplified C alpha force field")
214  # set radii
215  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
216  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
217  # IMP.atom.Mass(ca.get_particle()).set_mass(1.)
218  # create bonds by getting pairs and applying a pairscore
219  pairs = []
220  for chain in prot.get_children():
221  residues = [(chain.get_child(i), chain.get_child(i + 1))
222  for i in range(chain.get_number_of_children() - 1)]
223  residues = [(i.get_child(0).get_particle(),
224  j.get_child(0).get_particle())
225  for (i, j) in residues]
226  pairs.extend(residues)
227  bonds = []
228  for (i, j) in pairs:
229  # check, because it fails if you try twice
231  bi = IMP.atom.Bonded(i)
232  else:
235  bi = IMP.atom.Bonded(i)
236  else:
238  bond = IMP.atom.create_custom_bond(bi, bj, 3.78, 10) # stiff
239  bonds.append(bond)
240  bonds_container = IMP.container.ListSingletonContainer(
241  self._m, IMP.get_indexes(bonds))
242  hdps = IMP.core.Harmonic(0, 1)
243  bs = IMP.atom.BondSingletonScore(hdps)
244  br = IMP.container.SingletonsRestraint(bs, bonds_container)
245  rsb = IMP.RestraintSet("bonded")
246  rsb.add_restraint(br)
247  rsb.set_weight(1.0 / (kB * ff_temp))
248  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
249  nonbonded_pair_filter.set_bonds(bonds)
250  ff = None
251  else:
252  print("setting up CHARMM forcefield")
253  #
254  # Using the CHARMM libraries, determine the ideal topology
255  # (atoms and their connectivity) for the PDB file's primary
256  # sequence
257  topology = ff.create_topology(prot)
258  # Typically this modifies the C and N termini of each chain in
259  # the protein by applying the CHARMM CTER and NTER patches.
260  # Patches can also be manually applied at this point, e.g. to add
261  # disulfide bridges.
262  topology.apply_default_patches()
263  # disulfides
264  if disulfides:
265  s = topology.get_segment(0)
266  dis = ff.get_patch('DISU')
267  for (i, j) in disulfides:
268  self.find_atom((i, 'SG'), prot)
269  self.find_atom((j, 'SG'), prot)
270  r0 = s.get_residue(i)
271  r1 = s.get_residue(j)
272  if i == 0:
273  r0.set_patched(False)
274  if j == 0:
275  r1.set_patched(False)
276  dis.apply(r0, r1)
277  print("added disulfide bridge between cysteines %d and %d"
278  % (i, j))
279  # Make the PDB file conform with the topology; i.e. if it contains
280  # extra atoms that are not in the CHARMM topology file, remove
281  # them; if it is missing atoms (e.g. sidechains, hydrogens) that
282  # are in the CHARMM topology, add them and construct their
283  # Cartesian coordinates from internal coordinate information.
284  topology.setup_hierarchy(prot)
285  # Set up and evaluate the stereochemical part (bonds, angles,
286  # dihedrals, impropers) of the CHARMM forcefield
287  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
288  rsb = IMP.RestraintSet(m, 1.0, "bonded")
289  rsb.add_restraint(r)
290  #
291  # Add non-bonded interaction (in this case, Lennard-Jones).
292  # This needs to know the radii and well depths for each atom,
293  # so add them from the forcefield (they can also be assigned
294  # manually using the XYZR or LennardJones decorators):
295  ff.add_radii(prot)
296  ff.add_well_depths(prot)
297  nonbonded_pair_filter = r.get_pair_filter()
298  # Get a list of all atoms in the protein, and put it in a container
299  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
301  IMP.get_indexes(atoms))
302  # Add a restraint for the Lennard-Jones interaction. This is built from
303  # a collection of building blocks. First, a ClosePairContainer
304  # maintains a list of all pairs of Particles that are close. Next, all
305  # 1-2, 1-3 and 1-4 pairs from the stereochemistry created above are
306  # filtered out. Then, a LennardJonesPairScore scores a pair of atoms
307  # with the Lennard-Jones potential. Finally, a PairsRestraint is used
308  # which simply applies the LennardJonesPairScore to each pair in the
309  # ClosePairContainer.
310  nbl = IMP.container.ClosePairContainer(cont, 4.0)
311  nbl.add_pair_filter(nonbonded_pair_filter)
312  # should weight the ff restraint by a temperature, set to 300K.
313  pr = IMP.container.PairsRestraint(pairscore, nbl)
314  rs = IMP.RestraintSet(m, 1.0 / (kB * ff_temp), 'phys')
315  rs.add_restraint(pr)
316  return prot, ff, rsb, rs
317 
318  def init_model_setup_scale(self, default, lower=None, upper=None):
319  """sets up a Scale particle to the initial default value. It can
320  optionally be constrained between two positive bounds, or else its
321  range is 0 to infinity.
322  """
323  m = self._m
324  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m), default)
325  if lower:
326  scale.set_lower(lower)
327  if upper:
328  scale.set_upper(upper)
329  return scale
330 
331  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
332  """given a list of scales, returns a RestraintSet('prior') with weight
333  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each
334  scale. If argument prior_rs is used, add them to that RestraintSet
335  instead.
336  """
337  if not prior_rs:
338  prior_rs = IMP.RestraintSet('prior')
339  prior_rs.set_weight(1.0)
340  for i in scales:
341  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
342  return prior_rs
343 
344  def init_model_jeffreys(self, scales, prior_rs=None):
345  """given a list of scales, returns a RestraintSet('prior') with weight
346  1.0 that contains a list of JeffreysRestraint on each scale.
347  If argument prior_rs is used, add them to that RestraintSet instead.
348  """
349  if not prior_rs:
350  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
351  for i in scales:
352  jr = IMP.isd.JeffreysRestraint(self._m, i)
353  prior_rs.add_restraint(jr)
354  return prior_rs
355 
356  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
357  """given a list of scales, returns a RestraintSet('prior') with weight
358  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
359  scale. If argument prior_rs is used, add them to that RestraintSet
360  instead.
361  """
362  if not (0 <= R <= c):
363  raise ValueError("parameters R and c should satisfy 0 <= R <= c")
364  if not prior_rs:
365  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
366  for i in scales:
367  prior_rs.add_restraint(
369  return prior_rs
370 
371  def find_atom(self, atom, prot):
372  """scans the prot hierarchy and tries to find atom = (resno, name)
373  assumes that resno follows the same numbering as the sequence.
374  Stores already found atoms for increased speed.
375  """
376  if not hasattr(self, '__memoized'):
377  self.__memoized = {prot: {}}
378  try:
379  return self.__memoized[prot][atom]
380  except KeyError:
381  pass
382  try:
383  sel = IMP.atom.Selection(hierarchy=prot,
384  residue_index=atom[0],
385  atom_type=IMP.atom.AtomType(atom[1])
386  ).get_selected_particles()
387  if len(sel) > 1:
388  print("found multiple atoms for atom %d %s!" % atom)
389  return
390  p0 = IMP.core.XYZ(sel[0])
391  except IndexError:
392  print("atom %d %s not found" % atom)
393  return
394  self.__memoized[prot][atom] = p0
395  return p0
396 
397  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
398  """
399  Sets up a vonMises torsion angle restraint using the given kappa
400  particle as concentration parameter. Returns the restraint.
401  data is a list of observations.
402  """
403  return IMP.isd.TALOSRestraint(atoms, data, kappa)
404 
405  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
406  """
407  Sets up a vonMises torsion angle restraint using the given kappa
408  particle as concentration parameter. Returns the restraint.
409  data is (mean, standard deviation).
410  """
411  raise NotImplementedError
412 
413  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
414  """
415  Sets up a lognormal distance restraint using the given sigma and gamma.
416  Returns the restraint.
417  assumes atoms = (atom1, atom2)
418  where atom1 is (resno, atomname) and resno is the residue sequence
419  number.
420  """
421  # find corresponding atoms
422  p0 = self.find_atom(atoms[0], prot)
423  p1 = self.find_atom(atoms[1], prot)
424  # create lognormal restraint using gamma_data = 1
426  self._m,
427  p0,
428  p1,
429  sigma,
430  gamma,
431  distance ** (-6))
432  return ln
433 
434  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
435  sigma, gamma):
436  """Reads an ambiguous NOE restraint. contributions is a list of
437  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
438  lognormal distance restraint using the given sigma and gamma.
439  Returns the restraint.
440  """
441  # create pairs list
442  pairs = [(self.find_atom(i, prot).get_particle(),
443  self.find_atom(j, prot).get_particle()) for (i, j) in
444  contributions]
445  pairs = IMP.container.ListPairContainer(pairs)
446  # create restraint
448  pairs, sigma, gamma, distance ** (-6))
449  return ln
450 
452  self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
453  bounds_sigma=(1.0, 0.1, 100), bounds_gamma=(1.0, 0.1, 100),
454  verbose=True, sequence_match=(1, 1)):
455  """read TBL file and store NOE restraints, using one sigma and one
456  gamma for the whole dataset. Creates the necessary uninformative
457  priors.
458  - prot: protein hierarchy
459  - seqfile: a file with 3-letter sequence
460  - tblfile: a TBL file with the restraints
461  - name: an optional name for the restraintset
462  - prior_rs: when not None, add new sigma and gamma to this
463  RestraintSet instance.
464  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
465  bounds can be -1 to set to default range [0,+inf]
466  - verbose: be verbose (default True)
467  - sequence_match : (noe_start, sequence_start)
468  Returns: data_rs, prior_rs, sigma, gamma
469  """
470  # prior
471  if verbose:
472  print("Prior for the NOE Scales")
473  sigma = self.init_model_setup_scale(*bounds_sigma)
474  gamma = self.init_model_setup_scale(*bounds_gamma)
475  prior_rs = self.init_model_jeffreys([sigma, gamma], prior_rs)
476  # likelihood
477  rs = IMP.RestraintSet(self._m, 1.0, name)
478  # use the TBLReader to parse the TBL file.
479  sequence = IMP.isd.utils.read_sequence_file(
480  seqfile, first_residue_number=sequence_match[1])
481  tblr = IMP.isd.TBLReader.TBLReader(sequence,
482  sequence_match=sequence_match)
483  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
484  for i, restraint in enumerate(restraints):
485  if verbose and i % 100 == 0:
486  print("\r%d" % i, end=' ')
487  sys.stdout.flush()
488  # a restraint is (contributions, dist, upper, lower, volume)
489  # where contributions is a tuple of contributing pairs
490  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
491  if len(restraint[0]) > 1:
493  prot, restraint[0],
494  restraint[1], sigma, gamma)
495  else:
496  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
497  restraint[1], sigma, gamma)
498  rs.add_restraint(ln)
499  if verbose:
500  print("\r%d NOE restraints read" % i)
501  # set weight of rs.
502  # Weight is 1.0 cause sigma particle already has this role.
503  return rs, prior_rs, sigma, gamma
504 
505  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
506  verbose=True, sequence_match=(1, 1)):
507  """read TBL file and store NOE restraints, using the marginal of the
508  lognormal with one sigma and one gamma, for the whole dataset.
509  - prot: protein hierarchy
510  - seqfile: a file with 3-letter sequence
511  - tblfile: a TBL file with the restraints
512  - name: an optional name for the restraintset
513  - verbose: be verbose (default True)
514  - sequence_match : (noe_start, sequence_start)
515  Returns: data_rs
516  """
517  # likelihood
518  rs = IMP.RestraintSet(self._m, 1.0, name)
519  # use the TBLReader to parse the TBL file.
520  sequence = IMP.isd.utils.read_sequence_file(
521  seqfile, first_residue_number=sequence_match[1])
522  tblr = IMP.isd.TBLReader.TBLReader(sequence,
523  sequence_match=sequence_match)
524  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
526  for i, restraint in enumerate(restraints):
527  if verbose and i % 100 == 0:
528  print("\r%d" % i, end=' ')
529  sys.stdout.flush()
530  # a restraint is (contributions, dist, upper, lower, volume)
531  # where contributions is a tuple of contributing pairs
532  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
533  # residue numbers start at 0
534  pairs = [(self.find_atom(i, prot).get_particle(),
535  self.find_atom(j, prot).get_particle()) for (i, j) in
536  restraint[0]]
537  pairs = IMP.container.ListPairContainer(pairs)
538  ln.add_contribution(pairs, restraint[4])
539  rs.add_restraint(ln)
540  if verbose:
541  print("\r%d NOE contributions added" % (len(restraints)))
542  return rs
543 
544  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
545  verbose=True):
546  """read TBL file and store lognormal restraints, using the marginal
547  of the lognormal with one sigma and gamma=1, for the whole dataset.
548  - prot: protein hierarchy
549  - seqfile: a file with 3-letter sequence
550  - tblfile: a TBL file with the restraints
551  - name: an optional name for the restraintset
552  - verbose: be verbose (default True)
553  Returns: data_rs
554  """
555  # likelihood
556  rs = IMP.RestraintSet(self._m, 1.0, name)
557  # use the TBLReader to parse the TBL file.
558  sequence = IMP.isd.utils.read_sequence_file(seqfile)
559  tblr = IMP.isd.TBLReader.TBLReader(sequence)
560  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
562  for i, restraint in enumerate(restraints):
563  if verbose and i % 100 == 0:
564  print("\r%d" % i, end=' ')
565  sys.stdout.flush()
566  # a restraint is (contributions, dist, upper, lower, volume)
567  # where contributions is a tuple of contributing pairs
568  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
569  # residue numbers start at 0
570  pairs = [(self.find_atom(i, prot).get_particle(),
571  self.find_atom(j, prot).get_particle()) for (i, j) in
572  restraint[0]]
573  pairs = IMP.container.ListPairContainer(pairs)
574  ln.add_contribution(pairs, restraint[4])
575  rs.add_restraint(ln)
576  if verbose:
577  print("\r%d Hbond contributions added" % (len(restraints)))
578  return rs
579 
580  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
581  sequence_match=(1, 1), name='TALOS', prior_rs=None,
582  bounds_kappa=(1.0, 0.1, 10), verbose=True,
583  prior='jeffreys', keep_all=False):
584  """read TALOS dihedral angle data, and create restraints for phi/psi
585  torsion angles, along with the prior for kappa, which is a scale for
586  the whole dataset, compare to 1/sigma**2 in the NOE case.
587  - prot: protein hierarchy
588  - seqfile: a file with 3-letter sequence
589  - talos_data: either a file (pred.tab), or a folder (pred/) in which
590  all files in the form res???.tab can be found. If
591  possible, try to provide the folder, as statistics are
592  more accurate in that case.
593  - fulldata : either True or False, whether the data is the full TALOS
594  output (predAll.tab or pred/ folder), or just the averages
595  (pred.tab)
596  - sequence_match : (talos_no, sequence_no) to adjust for different
597  residue numberings
598  - name: an optional name for the restraintset
599  - prior_rs: when not None, add new kappa(s) to this RestraintSet
600  instance.
601  - bounds_kappa: tuple of (initial value, lower, upper bound)
602  bounds can be -1 to set to default range [0,+inf]
603  - verbose: be verbose (default True)
604  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
605  conjugate prior of the von Mises restraint, with parameters R
606  and c. Good values are R=0 and c=10. Default: jeffreys prior.
607  - keep_all: in case of a folder for 'talos_data', whether to keep
608  candidates marked as 'outliers' by TALOS, or to include
609  them.
610  Returns: data_rs, prior_rs, kappa
611 
612  """
613  # prior
614  if verbose:
615  print("Prior for von Mises Kappa")
616  kappa = self.init_model_setup_scale(*bounds_kappa)
617  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
618  # likelihood
619  if verbose:
620  print("reading data")
621  rs = IMP.RestraintSet(self._m, 1.0, name)
622  sequence = IMP.isd.utils.read_sequence_file(
623  seqfile, first_residue_no=sequence_match[1])
624  if fulldata:
625  talosr = IMP.isd.TALOSReader.TALOSReader(
626  sequence, True, keep_all, sequence_match=sequence_match)
627  if os.path.isdir(talos_data):
628  # using pred/res???.tab files
629  for i, res in enumerate(glob(os.path.join(talos_data,
630  'res???.tab'))):
631  if verbose and i % 100:
632  print("\r%d" % i, end=' ')
633  sys.stdout.flush()
634  talosr.read(res)
635  else:
636  # using predAll.tab file
637  talosr.read(talos_data)
638  else:
639  # using pred.tab file and correcting for estimates
640  talosr = IMP.isd.TALOSReader.TALOSReader(
641  sequence, False, keep_all, sequence_match=sequence_match)
642  talosr.read(talos_data)
643  # get harvested data and create restraints
644  data = talosr.get_data()
645  if verbose:
646  print("\rread dihedral data for %d residues" % len(data))
647  print("creating restraints")
648  avgR = []
649  for resno, datum in data.items():
650  phidata = datum['phi']
651  psidata = datum['psi']
652  # num = datum['num']
653  res = IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
656  if fulldata:
658  phi, phidata, kappa)
659  rs.add_restraint(r)
660  if verbose:
661  avgR.append(r.get_R0())
663  psi, psidata, kappa)
664  rs.add_restraint(r)
665  if verbose:
666  avgR.append(r.get_R0())
667  else:
669  phi, phidata, kappa)
670  rs.add_restraint(r)
671  if verbose:
672  avgR.append(r.get_R0())
674  psi, psidata, kappa)
675  rs.add_restraint(r)
676  if verbose:
677  avgR.append(r.get_R0())
678  if verbose:
679  print("%s Restraints created. Average R0: %f"
680  % (len(avgR), sum(avgR) / float(len(avgR))))
681  return rs, prior_rs, kappa
682 
684  self, prot, profilefile, name='SAXS',
685  ff_type=IMP.saxs.HEAVY_ATOMS):
686  """read experimental SAXS profile and apply restraint the standard
687  way (like foxs)
688  Returns: a restraintset and the experimental profile
689  """
690  rs = IMP.RestraintSet(self._m, 1.0, name)
691  saxs_profile = IMP.saxs.Profile(profilefile)
692  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
693  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
694  rs.add_restraint(saxs_restraint)
695  return rs, saxs_profile
696 
697  def _setup_md(self, prot, temperature=300.0, thermostat='berendsen',
698  coupling=500, md_restraints=None, timestep=1.0,
699  recenter=1000, momentum=1000):
700  """setup molecular dynamics
701  - temperature: target temperature
702  - thermostat: one of 'NVE', rescale_velocities',
703  'berendsen', 'langevin'
704  - coupling: coupling constant (tau (fs) for berendsen,
705  gamma (/fs) for langevin)
706  - md_restraints: if not None, specify the terms of the energy to be
707  used during the md steps via a list of restraints.
708  - timestep: in femtoseconds.
709  - recenter: recenter the molecule every so many steps (Langevin only)
710  - momentum: remove angular momentum every so many steps
711  (Berendsen only)
712  Returns: an instance of md and an instance of an OptimizerState (the
713  thermostat), or None if NVE.
714  """
715  # Molecular Dynamics (from MAX BONOMI)
716  md = IMP.atom.MolecularDynamics(self._m)
717  md.assign_velocities(temperature)
718  md.set_time_step(timestep)
719  if thermostat == 'NVE':
720  os = None
721  elif thermostat == 'rescale_velocities':
723  IMP.atom.get_leaves(prot), temperature)
724  md.add_optimizer_state(os)
725  elif thermostat == 'berendsen':
727  IMP.atom.get_leaves(prot), temperature, coupling)
728  md.add_optimizer_state(os)
730  IMP.atom.get_leaves(prot), momentum)
731  md.add_optimizer_state(mom)
732  elif thermostat == 'langevin':
734  IMP.atom.get_leaves(prot), temperature, coupling)
735  md.add_optimizer_state(os)
736  # cen = IMP.atom.RemoveTranslationOptimizerState(
737  # IMP.atom.get_leaves(prot), recenter)
738  # md.add_optimizer_state(cen)
739  else:
740  raise NotImplementedError(thermostat)
741 
742  if md_restraints:
743  md.set_restraints(md_restraints)
744 
745  return md, os
746 
747  def _setup_normal_mover(self, particle, floatkey, stepsize):
748  """setup NormalMover to move particle's floatkey attribute
749  by a gaussian with standard deviation 'stepsize'
750  Returns: mover instance.
751  """
752  nm = IMP.core.NormalMover(self._m, particle.get_particle_index(),
753  IMP.FloatKeys([floatkey]), stepsize)
754  return nm
755 
756  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
757  """setup MDMover using md and particles.
758  - md: md instance
759  - particles: particles to move, usually the leaves of the protein
760  hierarchy
761  - n_md_steps: number of md steps to perform on each move
762  Returns: mover instance.
763  """
765  cont.add(IMP.get_indexes(particles))
766  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
767 
768  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
769  """setup monte carlo using a certain mover.
770  - mover: mover to use, NormalMover or MDMover usually.
771  - temperature: target temperature.
772  - mc_restraints: if not None, list of restraints for the metropolis
773  criterion.
774  Returns: mc instance.
775  """
776  mc = IMP.core.MonteCarlo(self._m)
777  # why is this returning an int?
778  mc.add_mover(mover)
779  # set same temp as MD, careful with units
780  mc.set_kt(kB * temperature)
781  # allow to go uphill
782  mc.set_return_best(False)
783  if mc_restraints:
784  # tell mc to only use restraints involving the particle
785  mc.set_restraints(mc_restraints)
786  return mc
787 
789  self, prot, temperature=300.0, gamma=0.01, n_md_steps=10,
790  md_restraints=None, mc_restraints=None, timestep=1.0,
791  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
792  """setup hybrid monte-carlo on protein. Uses basin hopping with
793  steepest descent minimization.
794  - prot: protein hierarchy.
795  - temperature: target temperature.
796  - gamma: coupling constant for langevin (/fs)
797  - n_md_steps: number of md steps per mc step
798  - md_restraints: if not None, specify the terms of the energy to be
799  used during the md steps.
800  - mc_restraints: if not None, use these energy terms for the metropolis
801  criterion.
802  - timestep: time step for md, in femtoseconds.
803  - sd_threshold: stop steepest descent after energy difference drops
804  below that threshold
805  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
806  - sd_maxsteps: maximum number of steps for steepest descent
807  Returns: hmc, mdmover, md and OptimizerState (thermostat)
808  """
809  raise NotImplementedError
810  md, os = self._setup_md(prot, temperature=temperature,
811  thermostat='langevin', coupling=gamma,
812  md_restraints=md_restraints, timestep=timestep)
813  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
814  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
815  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
816  sd = IMP.core.SteepestDescent(self._m)
817  sd.set_threshold(sd_threshold)
818  sd.set_step_size(sd_stepsize)
819  hmc.set_local_optimizer(sd)
820  hmc.set_local_steps(sd_maxsteps)
821  hmc.set_use_basin_hopping(True)
822  return hmc, mdmover, md, os
823 
825  self, prot, temperature=300.0, n_md_steps=100, md_restraints=None,
826  mc_restraints=None, timestep=1.0):
827  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
828  - prot: protein hierarchy.
829  - temperature: target temperature.
830  - coupling: coupling constant (tau (fs) for berendsen,
831  gamma (/fs) for langevin)
832  - n_md_steps: number of md steps per mc step
833  - md_restraints: if not None, specify the terms of the energy to be
834  used during the md steps.
835  - mc_restraints: if not None, use these energy terms for the metropolis
836  criterion.
837  - timestep: time step for md, in femtoseconds.
838  - sd_threshold: stop steepest descent after energy difference drops
839  below that threshold
840  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
841  - sd_maxsteps: maximum number of steps for steepest descent
842  Returns: hmc, mdmover and md
843  """
844  prot = self._p['prot']
845  md, os = self._setup_md(prot, temperature=temperature,
846  thermostat='NVE', coupling=None, timestep=1.0,
847  md_restraints=md_restraints)
848  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
850  cont.add(IMP.get_indexes(particles))
851  mdmover = PyMDMover(cont, md, n_md_steps)
852  mdmover.m = self._m
853  mc = PyMC(self._m)
854  # why is this returning an int?
855  mc.add_mover(mdmover)
856  # set same temp as MD, careful with units
857  mc.set_kt(kB * temperature)
858  # allow to go uphill
859  mc.set_return_best(False)
860  # update all particles each time
861  mc.set_move_probability(1.0)
862  return mc, mdmover, md
863 
864  def init_simulation_setup_nuisance_mc(self, nuis, temperature=300.0,
865  mc_restraints=None, nm_stepsize=0.1):
866  """sets up monte carlo on nuisance, at a certain target temperature,
867  optionally using a certain set of restraints only.
868  - nuis: nuisance particle
869  - temperature: target temperature
870  - mc_restraints: optional set of restraints from which the energy
871  should be drawn instead of the energy of the complete system.
872  - floatkey: the floatkey to move.
873  - nm_stepsize: the stepsize of the normal mover
874  Returns: mc instance, nm instance.
875  """
876  nm = self._setup_normal_mover(nuis, nuis.get_nuisance_key(),
877  nm_stepsize)
878  mc = self._setup_mc(nm, temperature, mc_restraints)
879  return mc, nm
880 
881  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
882  adjust_stepsize=True):
883  """run mc using a normal mover on a single particle,
884  update stepsize and store nsteps, acceptance and stepsize in the
885  statistics instance self.stat by using the key stats_key.
886  """
887  # do the monte carlo
888  mc.optimize(nsteps)
889  naccept = mc.get_number_of_forward_steps()
890  # increment the counter for this simulation
891  self.stat.increment_counter(stats_key, nsteps)
892  # get the acceptance rate, stepsize
893  accept = float(naccept) / nsteps
894  self.stat.update(stats_key, 'acceptance', accept)
895  stepsize = nm.get_sigma()
896  self.stat.update(stats_key, 'stepsize', stepsize)
897  # update stepsize if needed and requested
898  if adjust_stepsize:
899  if 0.4 < accept < 0.6:
900  return
901  if accept < 0.05:
902  accept = 0.05
903  if accept > 1.0:
904  accept = 1.0
905  nm.set_sigma(stepsize * 2 * accept)
906 
907  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
908  adjust_stepsize=True):
909  """run hmc, update stepsize and print statistics. Updates number of MD
910  steps to reach a target acceptance between 0.4 and 0.6, sends
911  statistics to self.stat. MD steps are always at least 10 and at
912  most 500.
913  """
914  hmc.optimize(nsteps)
915  naccept = hmc.get_number_of_forward_steps()
916  self.stat.increment_counter(stats_key, nsteps)
917  accept = float(naccept) / nsteps
918  self.stat.update(stats_key, 'acceptance', accept)
919  mdsteps = mv.get_number_of_steps()
920  self.stat.update(stats_key, 'n_md_steps', mdsteps)
921  if adjust_stepsize:
922  if 0.4 < accept < 0.6:
923  return
924  mdsteps = int(mdsteps * 2 ** (accept - 0.5))
925  if mdsteps > 500:
926  mdsteps = 500
927  if mdsteps < 10:
928  mdsteps = 10
929  mv.set_nsteps(mdsteps)
930 
931  def get_pdb(self, prot):
932  """returns a string corresponding to the pdb structure of hierarchy
933  prot.
934  """
935  output = StringIO()
936  IMP.atom.write_pdb(prot, output)
937  return output.getvalue()
938 
939  def get_netcdf(self, prot):
940  raise NotImplementedError
941 
942  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
943  temperature=300.0, prot_coordinates=None):
944  """updates statistics for md simulation: target temp, kinetic energy,
945  kinetic temperature, writes coordinates and increments counter.
946  - md_key: stats md key
947  - nsteps: number of steps performed.
948  - md_instance: instance of the MolecularDynamics class.
949  - temperature: target temperature
950  - prot_coordinates: protein coordinates to be passed to the stats
951  class, (should be a string)
952  """
953  self.stat.update(md_key, 'target_temp', temperature)
954  kinetic = md_instance.get_kinetic_energy()
955  self.stat.update(md_key, 'E_kinetic', kinetic)
956  self.stat.update(md_key, 'instant_temp',
957  md_instance.get_kinetic_temperature(kinetic))
958  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
959  self.stat.increment_counter(md_key, nsteps)
960 
961  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
962  """shortcut for a frequent series of operations on MC simulations'
963  statistics. Creates an entry for acceptance, stepsize and one
964  coordinate set printed in the statistics file.
965  """
966  # create category
967  mc_key = stat.add_category(name=name)
968  # giving None as argument is a way to create a static entry.
969  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
970  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
971  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
972  # special call to add coordinates to be dumped
973  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
974  # add the counter to the output
975  stat.add_entry(mc_key, name='counter')
976  return mc_key
977 
978  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
979  """shortcut for a frequent series of operations on MD simulations'
980  statistics. Creates an entry for target temp, instantaneous temp,
981  kinetic energy, and one set of coordinates called 'protein' by
982  default.
983  """
984  # create category
985  md_key = stat.add_category(name=name)
986  # giving None as argument is a way to create a static entry.
987  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
988  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
989  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
990  # special call to add coordinates to be dumped
991  stat.add_coordinates(md_key, coord)
992  # add the counter to the output
993  stat.add_entry(md_key, name='counter')
994  return md_key
995 
996  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
997  """shortcut for a frequent series of operations on HMC simulations'
998  statistics. Adds acceptance, number of MD steps and a trajectory for
999  a protein.
1000  """
1001  # create category
1002  hmc_key = stat.add_category(name=name)
1003  # giving None as argument is a way to create a static entry.
1004  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
1005  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
1006  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
1007  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
1008  # special call to add coordinates to be dumped
1009  stat.add_coordinates(hmc_key, coord)
1010  # add the counter to the output
1011  stat.add_entry(hmc_key, name='counter')
1012  return hmc_key
1013 
1014  def rescale_velocities(self, particles, factor):
1015  """rescale the velocities of a bunch of particles having vx vy and vz
1016  floatkeys
1017  """
1018  keys = [IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
1019  for p in particles:
1020  for k in keys:
1021  p.set_value(k, p.get_value(k) * factor)
Applies a SingletonScore to each Singleton in a list.
CHARMMParameters * get_heavy_atom_CHARMM_parameters()
def init_stats_add_md_category
shortcut for a frequent series of operations on MD simulations' statistics.
A Monte Carlo optimizer.
Definition: MonteCarlo.h:44
Maintains temperature during molecular dynamics.
Enforce CHARMM stereochemistry on the given Hierarchy.
Atoms get_phi_dihedral_atoms(Residue rd)
Return the atoms comprising the phi dihedral.
def init_model_NOE_restraint
Sets up a lognormal distance restraint using the given sigma and gamma.
def init_model_jeffreys
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of Jeffr...
Various classes to hold sets of particles.
A decorator for a particle which has bonds.
Calculate score based on fit to SAXS profile.
def init_model_NOEs_marginal
read TBL file and store NOE restraints, using the marginal of the lognormal with one sigma and one ga...
def init_model_setup_scale
sets up a Scale particle to the initial default value.
A simple steepest descent optimizer.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
def init_model_conjugate_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
def init_model_ambiguous_NOE_restraint
Reads an ambiguous NOE restraint.
Definition: Entry.py:1
def find_atom
scans the prot hierarchy and tries to find atom = (resno, name) assumes that resno follows the same n...
Ambiguous NOE distance restraint between a number of pairs of particles.
def init_model_base
moves to wd and creates model
The type of an atom.
Return all close unordered pairs of particles taken from the SingletonContainer.
Classes to handle ISD statistics files.
def init_model_NOEs
read TBL file and store NOE restraints, using one sigma and one gamma for the whole dataset...
void read_pdb(TextInput input, int model, Hierarchy h)
def init_simulation_setup_nuisance_mc
sets up monte carlo on nuisance, at a certain target temperature, optionally using a certain set of r...
CHARMM force field parameters.
Object used to hold a set of restraints.
Definition: RestraintSet.h:41
Bond create_custom_bond(Bonded a, Bonded b, Float length, Float stiffness=-1)
Connect the two wrapped particles by a custom bond.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Removes rigid translation and rotation from the particles.
Store a list of ParticleIndexPairs.
def init_stats_add_hmc_category
shortcut for a frequent series of operations on HMC simulations' statistics.
Conjugate prior for the concentration parameter of a von Mises distribution.
Maintains temperature during molecular dynamics by velocity scaling.
def init_model_HBonds_marginal
read TBL file and store lognormal restraints, using the marginal of the lognormal with one sigma and ...
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
Get the indexes from a list of particle pairs.
Simple molecular dynamics simulator.
Store a list of ParticleIndexes.
def init_model_vonMises_restraint_mean
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
Atoms get_psi_dihedral_atoms(Residue rd)
Return the atoms comprising the psi dihedral.
Apply an NOE distance restraint between two particles.
Definition: NOERestraint.h:23
Base class for all optimizers.
Definition: Optimizer.h:48
static Bonded setup_particle(Model *m, ParticleIndex pi)
Jeffreys prior for the concentration parameter of a von Mises distribution.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
static Scale setup_particle(Model *m, ParticleIndex pi)
Definition: Scale.h:30
phi/psi dihedral restraint between four particles, using data from TALOS.
Score the bond based on a UnaryFunction,.
def get_pdb
returns a string corresponding to the pdb structure of hierarchy prot.
def init_model_jeffreys_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:23
def init_model_vonMises_restraint_full
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
void set_log_level(LogLevel l)
Set the current global log level.
def init_model_TALOS
read TALOS dihedral angle data, and create restraints for phi/psi torsion angles, along with the prio...
A decorator for a residue.
Definition: Residue.h:137
def do_md_protein_statistics
updates statistics for md simulation: target temp, kinetic energy, kinetic temperature, writes coordinates and increments counter.
Apply a lognormal distance restraint between two particles.
Basic functionality that is expected to be used by a wide variety of IMP users.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def m
wrapper to call methods of m
def rescale_velocities
rescale the velocities of a bunch of particles having vx vy and vz floatkeys
def init_simulation_setup_protein_hmc_hopper
setup hybrid monte-carlo on protein.
def init_model_standard_SAXS_restraint
read experimental SAXS profile and apply restraint the standard way (like foxs) Returns: a restraints...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
A filter that excludes bonds, angles and dihedrals.
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def init_model_charmm_protein_and_ff
creates a CHARMM protein representation.
def init_stats_add_mc_category
shortcut for a frequent series of operations on MC simulations' statistics.
Apply an NOE distance restraint between two particles.
def init_simulation_setup_protein_hmc_nve
setup hybrid monte-carlo on protein.
nonspecific methods used across all shared function objects.
Functionality for loading, creating, manipulating and scoring atomic structures.
CHARMMParameters * get_all_atom_CHARMM_parameters()
Miscellaneous utilities.
Hierarchies get_leaves(const Selection &h)
Classes to handle TALOS files or folders.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Applies a PairScore to each Pair in a list.
Support for small angle X-ray scattering (SAXS) data.
void set_check_level(CheckLevel tf)
Control runtime checks in the code.
Definition: exception.h:72
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
Classes to handle TBL files.
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:27
Maintains temperature during molecular dynamics.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27