IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/21
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 import sys
3 import os
4 from glob import glob
5 
6 import IMP
7 import IMP.atom
8 import IMP.container
9 import IMP.saxs
10 import IMP.core
11 import IMP.isd
12 import IMP.isd.TBLReader
14 import IMP.isd.utils
15 from IMP.isd.Entry import Entry
16 
17 from math import exp
18 from io import StringIO
19 
20 from random import random
21 
22 kB = (1.381 * 6.02214) / 4184.0
23 
24 
25 class PyMDMover:
26 
27  def __init__(self, cont, md, n_md_steps):
28  self.cont = cont
29  self.md = md
30  self.n_md_steps = n_md_steps
31  self.velkeys = [IMP.FloatKey("vx"),
32  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
33 
34  def store_move(self):
35  self.oldcoords = []
36  # self.oldvel=[]
37  for i in range(self.cont.get_number_of_particles()):
38  p = self.cont.get_particle(i)
39  d = IMP.core.XYZ(p)
40  self.oldcoords.append(d.get_coordinates())
41 
42  def propose_move(self, prob):
43  self.md.optimize(self.n_md_steps)
44 
45  def reset_move(self):
46  for i in range(self.cont.get_number_of_particles()):
47  p = self.cont.get_particle(i)
48  # for j,k in enumerate(self.velkeys):
49  # p.set_value(k, self.oldvel[i][j])
50  d = IMP.core.XYZ(p)
51  d.set_coordinates(self.oldcoords[i])
52 
53  def get_number_of_steps(self):
54  return self.n_md_steps
55 
56  def set_number_of_steps(self, nsteps):
57  self.n_md_steps = nsteps
58 
59 
60 class PyMC(IMP.Optimizer):
61  debug = True
62 
63  def __init__(self, model):
64  self.m = model
65 
66  def add_mover(self, mv):
67  self.mv = mv
68 
69  def set_kt(self, kT):
70  self.kT = kT
71 
72  def set_return_best(self, thing):
73  pass
74 
75  def set_move_probability(self, thing):
76  pass
77 
78  def get_energy(self):
79  pot = self.m.evaluate(False)
80  # pot=self.get_restraints().evaluate(False)
81  kin = self.mv.md.get_kinetic_energy()
82  return pot + kin
83 
84  def metropolis(self, old, new):
85  deltaE = new - old
86  if self.debug:
87  print(": old %f new %f deltaE %f new_epot: %f"
88  % (old, new, deltaE, self.m.evaluate(False)), end=' ')
89  kT = self.kT
90  if deltaE < 0:
91  return True
92  else:
93  return exp(-deltaE / kT) > random()
94 
95  def optimize(self, nsteps):
96  self.naccept = 0
97  print("=== new MC call")
98  # store initial coordinates
99  self.mv.store_move()
100  for i in range(nsteps):
101  print("MC step %d " % i, end=' ')
102  # draw new velocities
103  self.mv.md.assign_velocities(self.kT / kB)
104  # get total energy
105  old = self.get_energy()
106  # make a MD move
107  self.mv.propose_move(1)
108  # get new total energy
109  new = self.get_energy()
110  if self.metropolis(old, new):
111  # move was accepted: store new conformation
112  self.mv.store_move()
113  self.naccept += 1
114  print("accepted ")
115  else:
116  # move rejected: restore old conformation
117  self.mv.reset_move()
118  print(" ")
119 
120  def get_number_of_forward_steps(self):
121  return self.naccept
122 
123 
125 
126  """nonspecific methods used across all shared function objects.
127  Rules:
128  - Their name starts with the name of the parent function (e.g.
129  init_model_* )
130  - they don't store anything in the class, but instead
131  return all created objects.
132  Exceptions: the model, which is self._m
133  the statistics class, which is self.stat
134  - they store what they have to store in the model (e.g. restraints)
135  - they don't print out anything except for long routines (e.g. NOE
136  parsing)
137  - the prior RestraintSet is added to the model when it is created, so
138  that when it is passed to another function, it is not added twice.
139  """
140 
141  def hello(self):
142  return "hello world"
143 
144  def set_checklevel(self, value):
145  IMP.set_check_level(value)
146 
147  def set_loglevel(self, value):
148  IMP.set_log_level(value)
149 
150  def m(self, name, *args, **kw):
151  "wrapper to call methods of m"
152  func = getattr(self._m, name)
153  return func(*args, **kw)
154 
155  def init_model_base(self, wd):
156  "moves to wd and creates model"
157  os.chdir(wd)
158  # Create an IMP model
159  self._m = IMP.Model()
160 
162  self, initpdb, top, par, selector, pairscore,
163  ff_temp=300.0, disulfides=None, representation='custom'):
164  """creates a CHARMM protein representation.
165  creates the charmm force field, bonded and nonbonded.
166  - initpdb: initial structure in pdb format
167  - top is a CHARMM top.lib, read if representation=='custom' (default)
168  - par is a CHARMM par.lib
169  - selector is an instance of
170  one of the selectors of IMP.atom, for example
171  IMP.atom.NonWaterNonHydrogenPDBSelector().
172  - pairscore is an instance of a Pair Score to score the interaction
173  between two atoms. usually, it's either
174  LennardJonesDistancePairScore(0,1) or
175  RepulsiveDistancePairScore(0,1)
176  - ff_temp is the temperature at which the force field should be
177  simulated.
178  - disulfides: if not None, a list of tuples corresponding to residue
179  numbers that should be cross-linked. Residues should be
180  cysteines, and residue numbering should start at 0.
181  - representation: 'full' : all-atom CHARMM force field
182  'heavy': heavy atom CHARMM forcefield with polar H
183  'calpha': only C alphas, ball and stick model with
184  bondlength 3.78 angstrom, beads at VdW
185  contact, and harmonic restraint between
186  them.
187  'custom' (default) : read given CHARMM top and par
188  files.
189  Returns: prot, ff, rsb, rs
190  - prot: the protein
191  - ff: the force field
192  - rsb: the RestraintSet on bonded interactions
193  - rs: the RestraintSet on nonbonded interactions. Both are weighted
194  using ff_temp.
195  """
196  m = self._m
197  prot = IMP.atom.read_pdb(initpdb, m, selector)
198  if not prot.get_is_valid(True):
199  raise ValueError("invalid hierarchy!")
200  if representation == 'custom':
201  # Read in the CHARMM heavy atom topology and parameter files
202  ff = IMP.atom.CHARMMParameters(top, par)
203  elif representation == 'heavy':
205  elif representation == 'full':
207  elif representation == 'calpha':
208  pass
209  else:
210  raise NotImplementedError(representation)
211  if representation == 'calpha':
212  print("setting up simplified C alpha force field")
213  # set radii
214  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
215  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
216  # IMP.atom.Mass(ca.get_particle()).set_mass(1.)
217  # create bonds by getting pairs and applying a pairscore
218  pairs = []
219  for chain in prot.get_children():
220  residues = [(chain.get_child(i), chain.get_child(i + 1))
221  for i in range(chain.get_number_of_children() - 1)]
222  residues = [(i.get_child(0).get_particle(),
223  j.get_child(0).get_particle())
224  for (i, j) in residues]
225  pairs.extend(residues)
226  bonds = []
227  for (i, j) in pairs:
228  # check, because it fails if you try twice
230  bi = IMP.atom.Bonded(i)
231  else:
234  bi = IMP.atom.Bonded(i)
235  else:
237  bond = IMP.atom.create_custom_bond(bi, bj, 3.78, 10) # stiff
238  bonds.append(bond)
239  bonds_container = IMP.container.ListSingletonContainer(
240  self._m, IMP.get_indexes(bonds))
241  hdps = IMP.core.Harmonic(0, 1)
242  bs = IMP.atom.BondSingletonScore(hdps)
243  br = IMP.container.SingletonsRestraint(bs, bonds_container)
244  rsb = IMP.RestraintSet("bonded")
245  rsb.add_restraint(br)
246  rsb.set_weight(1.0 / (kB * ff_temp))
247  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
248  nonbonded_pair_filter.set_bonds(bonds)
249  ff = None
250  else:
251  print("setting up CHARMM forcefield")
252  #
253  # Using the CHARMM libraries, determine the ideal topology
254  # (atoms and their connectivity) for the PDB file's primary
255  # sequence
256  topology = ff.create_topology(prot)
257  # Typically this modifies the C and N termini of each chain in
258  # the protein by applying the CHARMM CTER and NTER patches.
259  # Patches can also be manually applied at this point, e.g. to add
260  # disulfide bridges.
261  topology.apply_default_patches()
262  # disulfides
263  if disulfides:
264  s = topology.get_segment(0)
265  dis = ff.get_patch('DISU')
266  for (i, j) in disulfides:
267  self.find_atom((i, 'SG'), prot)
268  self.find_atom((j, 'SG'), prot)
269  r0 = s.get_residue(i)
270  r1 = s.get_residue(j)
271  if i == 0:
272  r0.set_patched(False)
273  if j == 0:
274  r1.set_patched(False)
275  dis.apply(r0, r1)
276  print("added disulfide bridge between cysteines %d and %d"
277  % (i, j))
278  # Make the PDB file conform with the topology; i.e. if it contains
279  # extra atoms that are not in the CHARMM topology file, remove
280  # them; if it is missing atoms (e.g. sidechains, hydrogens) that
281  # are in the CHARMM topology, add them and construct their
282  # Cartesian coordinates from internal coordinate information.
283  topology.setup_hierarchy(prot)
284  # Set up and evaluate the stereochemical part (bonds, angles,
285  # dihedrals, impropers) of the CHARMM forcefield
286  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
287  rsb = IMP.RestraintSet(m, 1.0, "bonded")
288  rsb.add_restraint(r)
289  #
290  # Add non-bonded interaction (in this case, Lennard-Jones).
291  # This needs to know the radii and well depths for each atom,
292  # so add them from the forcefield (they can also be assigned
293  # manually using the XYZR or LennardJones decorators):
294  ff.add_radii(prot)
295  ff.add_well_depths(prot)
296  nonbonded_pair_filter = r.get_pair_filter()
297  # Get a list of all atoms in the protein, and put it in a container
298  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
300  IMP.get_indexes(atoms))
301  # Add a restraint for the Lennard-Jones interaction. This is built from
302  # a collection of building blocks. First, a ClosePairContainer
303  # maintains a list of all pairs of Particles that are close. Next, all
304  # 1-2, 1-3 and 1-4 pairs from the stereochemistry created above are
305  # filtered out. Then, a LennardJonesPairScore scores a pair of atoms
306  # with the Lennard-Jones potential. Finally, a PairsRestraint is used
307  # which simply applies the LennardJonesPairScore to each pair in the
308  # ClosePairContainer.
309  nbl = IMP.container.ClosePairContainer(cont, 4.0)
310  nbl.add_pair_filter(nonbonded_pair_filter)
311  # should weight the ff restraint by a temperature, set to 300K.
312  pr = IMP.container.PairsRestraint(pairscore, nbl)
313  rs = IMP.RestraintSet(m, 1.0 / (kB * ff_temp), 'phys')
314  rs.add_restraint(pr)
315  return prot, ff, rsb, rs
316 
317  def init_model_setup_scale(self, default, lower=None, upper=None):
318  """sets up a Scale particle to the initial default value. It can
319  optionally be constrained between two positive bounds, or else its
320  range is 0 to infinity.
321  """
322  m = self._m
323  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m), default)
324  if lower:
325  scale.set_lower(lower)
326  if upper:
327  scale.set_upper(upper)
328  return scale
329 
330  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
331  """given a list of scales, returns a RestraintSet('prior') with weight
332  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each
333  scale. If argument prior_rs is used, add them to that RestraintSet
334  instead.
335  """
336  if not prior_rs:
337  prior_rs = IMP.RestraintSet('prior')
338  prior_rs.set_weight(1.0)
339  for i in scales:
340  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
341  return prior_rs
342 
343  def init_model_jeffreys(self, scales, prior_rs=None):
344  """given a list of scales, returns a RestraintSet('prior') with weight
345  1.0 that contains a list of JeffreysRestraint on each scale.
346  If argument prior_rs is used, add them to that RestraintSet instead.
347  """
348  if not prior_rs:
349  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
350  for i in scales:
351  jr = IMP.isd.JeffreysRestraint(self._m, i)
352  prior_rs.add_restraint(jr)
353  return prior_rs
354 
355  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
356  """given a list of scales, returns a RestraintSet('prior') with weight
357  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
358  scale. If argument prior_rs is used, add them to that RestraintSet
359  instead.
360  """
361  if not (0 <= R <= c):
362  raise ValueError("parameters R and c should satisfy 0 <= R <= c")
363  if not prior_rs:
364  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
365  for i in scales:
366  prior_rs.add_restraint(
368  return prior_rs
369 
370  def find_atom(self, atom, prot):
371  """scans the prot hierarchy and tries to find atom = (resno, name)
372  assumes that resno follows the same numbering as the sequence.
373  Stores already found atoms for increased speed.
374  """
375  if not hasattr(self, '__memoized'):
376  self.__memoized = {prot: {}}
377  try:
378  return self.__memoized[prot][atom]
379  except KeyError:
380  pass
381  try:
382  sel = IMP.atom.Selection(hierarchy=prot,
383  residue_index=atom[0],
384  atom_type=IMP.atom.AtomType(atom[1])
385  ).get_selected_particles()
386  if len(sel) > 1:
387  print("found multiple atoms for atom %d %s!" % atom)
388  return
389  p0 = IMP.core.XYZ(sel[0])
390  except IndexError:
391  print("atom %d %s not found" % atom)
392  return
393  self.__memoized[prot][atom] = p0
394  return p0
395 
396  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
397  """
398  Sets up a vonMises torsion angle restraint using the given kappa
399  particle as concentration parameter. Returns the restraint.
400  data is a list of observations.
401  """
402  return IMP.isd.TALOSRestraint(atoms, data, kappa)
403 
404  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
405  """
406  Sets up a vonMises torsion angle restraint using the given kappa
407  particle as concentration parameter. Returns the restraint.
408  data is (mean, standard deviation).
409  """
410  raise NotImplementedError
411 
412  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
413  """
414  Sets up a lognormal distance restraint using the given sigma and gamma.
415  Returns the restraint.
416  assumes atoms = (atom1, atom2)
417  where atom1 is (resno, atomname) and resno is the residue sequence
418  number.
419  """
420  # find corresponding atoms
421  p0 = self.find_atom(atoms[0], prot)
422  p1 = self.find_atom(atoms[1], prot)
423  # create lognormal restraint using gamma_data = 1
425  self._m,
426  p0,
427  p1,
428  sigma,
429  gamma,
430  distance ** (-6))
431  return ln
432 
433  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
434  sigma, gamma):
435  """Reads an ambiguous NOE restraint. contributions is a list of
436  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
437  lognormal distance restraint using the given sigma and gamma.
438  Returns the restraint.
439  """
440  # create pairs list
441  pairs = [(self.find_atom(i, prot).get_particle(),
442  self.find_atom(j, prot).get_particle()) for (i, j) in
443  contributions]
444  pairs = IMP.container.ListPairContainer(pairs)
445  # create restraint
447  pairs, sigma, gamma, distance ** (-6))
448  return ln
449 
451  self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
452  bounds_sigma=(1.0, 0.1, 100), bounds_gamma=(1.0, 0.1, 100),
453  verbose=True, sequence_match=(1, 1)):
454  """read TBL file and store NOE restraints, using one sigma and one
455  gamma for the whole dataset. Creates the necessary uninformative
456  priors.
457  - prot: protein hierarchy
458  - seqfile: a file with 3-letter sequence
459  - tblfile: a TBL file with the restraints
460  - name: an optional name for the restraintset
461  - prior_rs: when not None, add new sigma and gamma to this
462  RestraintSet instance.
463  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
464  bounds can be -1 to set to default range [0,+inf]
465  - verbose: be verbose (default True)
466  - sequence_match : (noe_start, sequence_start)
467  Returns: data_rs, prior_rs, sigma, gamma
468  """
469  # prior
470  if verbose:
471  print("Prior for the NOE Scales")
472  sigma = self.init_model_setup_scale(*bounds_sigma)
473  gamma = self.init_model_setup_scale(*bounds_gamma)
474  prior_rs = self.init_model_jeffreys([sigma, gamma], prior_rs)
475  # likelihood
476  rs = IMP.RestraintSet(self._m, 1.0, name)
477  # use the TBLReader to parse the TBL file.
479  seqfile, first_residue_number=sequence_match[1])
480  tblr = IMP.isd.TBLReader.TBLReader(sequence,
481  sequence_match=sequence_match)
482  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
483  for i, restraint in enumerate(restraints):
484  if verbose and i % 100 == 0:
485  print("\r%d" % i, end=' ')
486  sys.stdout.flush()
487  # a restraint is (contributions, dist, upper, lower, volume)
488  # where contributions is a tuple of contributing pairs
489  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
490  if len(restraint[0]) > 1:
492  prot, restraint[0],
493  restraint[1], sigma, gamma)
494  else:
495  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
496  restraint[1], sigma, gamma)
497  rs.add_restraint(ln)
498  if verbose:
499  print("\r%d NOE restraints read" % i)
500  # set weight of rs.
501  # Weight is 1.0 cause sigma particle already has this role.
502  return rs, prior_rs, sigma, gamma
503 
504  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
505  verbose=True, sequence_match=(1, 1)):
506  """read TBL file and store NOE restraints, using the marginal of the
507  lognormal with one sigma and one gamma, for the whole dataset.
508  - prot: protein hierarchy
509  - seqfile: a file with 3-letter sequence
510  - tblfile: a TBL file with the restraints
511  - name: an optional name for the restraintset
512  - verbose: be verbose (default True)
513  - sequence_match : (noe_start, sequence_start)
514  Returns: data_rs
515  """
516  # likelihood
517  rs = IMP.RestraintSet(self._m, 1.0, name)
518  # use the TBLReader to parse the TBL file.
520  seqfile, first_residue_number=sequence_match[1])
521  tblr = IMP.isd.TBLReader.TBLReader(sequence,
522  sequence_match=sequence_match)
523  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
525  for i, restraint in enumerate(restraints):
526  if verbose and i % 100 == 0:
527  print("\r%d" % i, end=' ')
528  sys.stdout.flush()
529  # a restraint is (contributions, dist, upper, lower, volume)
530  # where contributions is a tuple of contributing pairs
531  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
532  # residue numbers start at 0
533  pairs = [(self.find_atom(i, prot).get_particle(),
534  self.find_atom(j, prot).get_particle()) for (i, j) in
535  restraint[0]]
536  pairs = IMP.container.ListPairContainer(pairs)
537  ln.add_contribution(pairs, restraint[4])
538  rs.add_restraint(ln)
539  if verbose:
540  print("\r%d NOE contributions added" % (len(restraints)))
541  return rs
542 
543  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
544  verbose=True):
545  """read TBL file and store lognormal restraints, using the marginal
546  of the lognormal with one sigma and gamma=1, for the whole dataset.
547  - prot: protein hierarchy
548  - seqfile: a file with 3-letter sequence
549  - tblfile: a TBL file with the restraints
550  - name: an optional name for the restraintset
551  - verbose: be verbose (default True)
552  Returns: data_rs
553  """
554  # likelihood
555  rs = IMP.RestraintSet(self._m, 1.0, name)
556  # use the TBLReader to parse the TBL file.
557  sequence = IMP.isd.utils.read_sequence_file(seqfile)
558  tblr = IMP.isd.TBLReader.TBLReader(sequence)
559  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
561  for i, restraint in enumerate(restraints):
562  if verbose and i % 100 == 0:
563  print("\r%d" % i, end=' ')
564  sys.stdout.flush()
565  # a restraint is (contributions, dist, upper, lower, volume)
566  # where contributions is a tuple of contributing pairs
567  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
568  # residue numbers start at 0
569  pairs = [(self.find_atom(i, prot).get_particle(),
570  self.find_atom(j, prot).get_particle()) for (i, j) in
571  restraint[0]]
572  pairs = IMP.container.ListPairContainer(pairs)
573  ln.add_contribution(pairs, restraint[4])
574  rs.add_restraint(ln)
575  if verbose:
576  print("\r%d Hbond contributions added" % (len(restraints)))
577  return rs
578 
579  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
580  sequence_match=(1, 1), name='TALOS', prior_rs=None,
581  bounds_kappa=(1.0, 0.1, 10), verbose=True,
582  prior='jeffreys', keep_all=False):
583  """read TALOS dihedral angle data, and create restraints for phi/psi
584  torsion angles, along with the prior for kappa, which is a scale for
585  the whole dataset, compare to 1/sigma**2 in the NOE case.
586  - prot: protein hierarchy
587  - seqfile: a file with 3-letter sequence
588  - talos_data: either a file (pred.tab), or a folder (pred/) in which
589  all files in the form res???.tab can be found. If
590  possible, try to provide the folder, as statistics are
591  more accurate in that case.
592  - fulldata : either True or False, whether the data is the full TALOS
593  output (predAll.tab or pred/ folder), or just the averages
594  (pred.tab)
595  - sequence_match : (talos_no, sequence_no) to adjust for different
596  residue numberings
597  - name: an optional name for the restraintset
598  - prior_rs: when not None, add new kappa(s) to this RestraintSet
599  instance.
600  - bounds_kappa: tuple of (initial value, lower, upper bound)
601  bounds can be -1 to set to default range [0,+inf]
602  - verbose: be verbose (default True)
603  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
604  conjugate prior of the von Mises restraint, with parameters R
605  and c. Good values are R=0 and c=10. Default: jeffreys prior.
606  - keep_all: in case of a folder for 'talos_data', whether to keep
607  candidates marked as 'outliers' by TALOS, or to include
608  them.
609  Returns: data_rs, prior_rs, kappa
610 
611  """
612  # prior
613  if verbose:
614  print("Prior for von Mises Kappa")
615  kappa = self.init_model_setup_scale(*bounds_kappa)
616  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
617  # likelihood
618  if verbose:
619  print("reading data")
620  rs = IMP.RestraintSet(self._m, 1.0, name)
622  seqfile, first_residue_no=sequence_match[1])
623  if fulldata:
625  sequence, True, keep_all, sequence_match=sequence_match)
626  if os.path.isdir(talos_data):
627  # using pred/res???.tab files
628  for i, res in enumerate(glob(os.path.join(talos_data,
629  'res???.tab'))):
630  if verbose and i % 100:
631  print("\r%d" % i, end=' ')
632  sys.stdout.flush()
633  talosr.read(res)
634  else:
635  # using predAll.tab file
636  talosr.read(talos_data)
637  else:
638  # using pred.tab file and correcting for estimates
640  sequence, False, keep_all, sequence_match=sequence_match)
641  talosr.read(talos_data)
642  # get harvested data and create restraints
643  data = talosr.get_data()
644  if verbose:
645  print("\rread dihedral data for %d residues" % len(data))
646  print("creating restraints")
647  avgR = []
648  for resno, datum in data.items():
649  phidata = datum['phi']
650  psidata = datum['psi']
651  # num = datum['num']
652  res = IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
655  if fulldata:
657  phi, phidata, kappa)
658  rs.add_restraint(r)
659  if verbose:
660  avgR.append(r.get_R0())
662  psi, psidata, kappa)
663  rs.add_restraint(r)
664  if verbose:
665  avgR.append(r.get_R0())
666  else:
668  phi, phidata, kappa)
669  rs.add_restraint(r)
670  if verbose:
671  avgR.append(r.get_R0())
673  psi, psidata, kappa)
674  rs.add_restraint(r)
675  if verbose:
676  avgR.append(r.get_R0())
677  if verbose:
678  print("%s Restraints created. Average R0: %f"
679  % (len(avgR), sum(avgR) / float(len(avgR))))
680  return rs, prior_rs, kappa
681 
683  self, prot, profilefile, name='SAXS',
684  ff_type=IMP.saxs.HEAVY_ATOMS):
685  """read experimental SAXS profile and apply restraint the standard
686  way (like foxs)
687  Returns: a restraintset and the experimental profile
688  """
689  rs = IMP.RestraintSet(self._m, 1.0, name)
690  saxs_profile = IMP.saxs.Profile(profilefile)
691  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
692  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
693  rs.add_restraint(saxs_restraint)
694  return rs, saxs_profile
695 
696  def _setup_md(self, prot, temperature=300.0, thermostat='berendsen',
697  coupling=500, md_restraints=None, timestep=1.0,
698  recenter=1000, momentum=1000):
699  """setup molecular dynamics
700  - temperature: target temperature
701  - thermostat: one of 'NVE', rescale_velocities',
702  'berendsen', 'langevin'
703  - coupling: coupling constant (tau (fs) for berendsen,
704  gamma (/fs) for langevin)
705  - md_restraints: if not None, specify the terms of the energy to be
706  used during the md steps via a list of restraints.
707  - timestep: in femtoseconds.
708  - recenter: recenter the molecule every so many steps (Langevin only)
709  - momentum: remove angular momentum every so many steps
710  (Berendsen only)
711  Returns: an instance of md and an instance of an OptimizerState (the
712  thermostat), or None if NVE.
713  """
714  # Molecular Dynamics (from MAX BONOMI)
715  md = IMP.atom.MolecularDynamics(self._m)
716  md.assign_velocities(temperature)
717  md.set_time_step(timestep)
718  if thermostat == 'NVE':
719  os = None
720  elif thermostat == 'rescale_velocities':
722  IMP.atom.get_leaves(prot), temperature)
723  md.add_optimizer_state(os)
724  elif thermostat == 'berendsen':
726  IMP.atom.get_leaves(prot), temperature, coupling)
727  md.add_optimizer_state(os)
729  IMP.atom.get_leaves(prot), momentum)
730  md.add_optimizer_state(mom)
731  elif thermostat == 'langevin':
733  IMP.atom.get_leaves(prot), temperature, coupling)
734  md.add_optimizer_state(os)
735  # cen = IMP.atom.RemoveTranslationOptimizerState(
736  # IMP.atom.get_leaves(prot), recenter)
737  # md.add_optimizer_state(cen)
738  else:
739  raise NotImplementedError(thermostat)
740 
741  if md_restraints:
742  md.set_restraints(md_restraints)
743 
744  return md, os
745 
746  def _setup_normal_mover(self, particle, floatkey, stepsize):
747  """setup NormalMover to move particle's floatkey attribute
748  by a gaussian with standard deviation 'stepsize'
749  Returns: mover instance.
750  """
751  nm = IMP.core.NormalMover(self._m, particle.get_particle_index(),
752  IMP.FloatKeys([floatkey]), stepsize)
753  return nm
754 
755  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
756  """setup MDMover using md and particles.
757  - md: md instance
758  - particles: particles to move, usually the leaves of the protein
759  hierarchy
760  - n_md_steps: number of md steps to perform on each move
761  Returns: mover instance.
762  """
764  cont.add(IMP.get_indexes(particles))
765  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
766 
767  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
768  """setup monte carlo using a certain mover.
769  - mover: mover to use, NormalMover or MDMover usually.
770  - temperature: target temperature.
771  - mc_restraints: if not None, list of restraints for the metropolis
772  criterion.
773  Returns: mc instance.
774  """
775  mc = IMP.core.MonteCarlo(self._m)
776  # why is this returning an int?
777  mc.add_mover(mover)
778  # set same temp as MD, careful with units
779  mc.set_kt(kB * temperature)
780  # allow to go uphill
781  mc.set_return_best(False)
782  if mc_restraints:
783  # tell mc to only use restraints involving the particle
784  mc.set_restraints(mc_restraints)
785  return mc
786 
788  self, prot, temperature=300.0, gamma=0.01, n_md_steps=10,
789  md_restraints=None, mc_restraints=None, timestep=1.0,
790  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
791  """setup hybrid monte-carlo on protein. Uses basin hopping with
792  steepest descent minimization.
793  - prot: protein hierarchy.
794  - temperature: target temperature.
795  - gamma: coupling constant for langevin (/fs)
796  - n_md_steps: number of md steps per mc step
797  - md_restraints: if not None, specify the terms of the energy to be
798  used during the md steps.
799  - mc_restraints: if not None, use these energy terms for the metropolis
800  criterion.
801  - timestep: time step for md, in femtoseconds.
802  - sd_threshold: stop steepest descent after energy difference drops
803  below that threshold
804  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
805  - sd_maxsteps: maximum number of steps for steepest descent
806  Returns: hmc, mdmover, md and OptimizerState (thermostat)
807  """
808  raise NotImplementedError
809  md, os = self._setup_md(prot, temperature=temperature,
810  thermostat='langevin', coupling=gamma,
811  md_restraints=md_restraints, timestep=timestep)
812  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
813  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
814  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
815  sd = IMP.core.SteepestDescent(self._m)
816  sd.set_threshold(sd_threshold)
817  sd.set_step_size(sd_stepsize)
818  hmc.set_local_optimizer(sd)
819  hmc.set_local_steps(sd_maxsteps)
820  hmc.set_use_basin_hopping(True)
821  return hmc, mdmover, md, os
822 
824  self, prot, temperature=300.0, n_md_steps=100, md_restraints=None,
825  mc_restraints=None, timestep=1.0):
826  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
827  - prot: protein hierarchy.
828  - temperature: target temperature.
829  - coupling: coupling constant (tau (fs) for berendsen,
830  gamma (/fs) for langevin)
831  - n_md_steps: number of md steps per mc step
832  - md_restraints: if not None, specify the terms of the energy to be
833  used during the md steps.
834  - mc_restraints: if not None, use these energy terms for the metropolis
835  criterion.
836  - timestep: time step for md, in femtoseconds.
837  - sd_threshold: stop steepest descent after energy difference drops
838  below that threshold
839  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
840  - sd_maxsteps: maximum number of steps for steepest descent
841  Returns: hmc, mdmover and md
842  """
843  prot = self._p['prot']
844  md, os = self._setup_md(prot, temperature=temperature,
845  thermostat='NVE', coupling=None, timestep=1.0,
846  md_restraints=md_restraints)
847  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
849  cont.add(IMP.get_indexes(particles))
850  mdmover = PyMDMover(cont, md, n_md_steps)
851  mdmover.m = self._m
852  mc = PyMC(self._m)
853  # why is this returning an int?
854  mc.add_mover(mdmover)
855  # set same temp as MD, careful with units
856  mc.set_kt(kB * temperature)
857  # allow to go uphill
858  mc.set_return_best(False)
859  # update all particles each time
860  mc.set_move_probability(1.0)
861  return mc, mdmover, md
862 
863  def init_simulation_setup_nuisance_mc(self, nuis, temperature=300.0,
864  mc_restraints=None, nm_stepsize=0.1):
865  """sets up monte carlo on nuisance, at a certain target temperature,
866  optionally using a certain set of restraints only.
867  - nuis: nuisance particle
868  - temperature: target temperature
869  - mc_restraints: optional set of restraints from which the energy
870  should be drawn instead of the energy of the complete system.
871  - floatkey: the floatkey to move.
872  - nm_stepsize: the stepsize of the normal mover
873  Returns: mc instance, nm instance.
874  """
875  nm = self._setup_normal_mover(nuis, nuis.get_nuisance_key(),
876  nm_stepsize)
877  mc = self._setup_mc(nm, temperature, mc_restraints)
878  return mc, nm
879 
880  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
881  adjust_stepsize=True):
882  """run mc using a normal mover on a single particle,
883  update stepsize and store nsteps, acceptance and stepsize in the
884  statistics instance self.stat by using the key stats_key.
885  """
886  # do the monte carlo
887  mc.optimize(nsteps)
888  naccept = mc.get_number_of_forward_steps()
889  # increment the counter for this simulation
890  self.stat.increment_counter(stats_key, nsteps)
891  # get the acceptance rate, stepsize
892  accept = float(naccept) / nsteps
893  self.stat.update(stats_key, 'acceptance', accept)
894  stepsize = nm.get_sigma()
895  self.stat.update(stats_key, 'stepsize', stepsize)
896  # update stepsize if needed and requested
897  if adjust_stepsize:
898  if 0.4 < accept < 0.6:
899  return
900  if accept < 0.05:
901  accept = 0.05
902  if accept > 1.0:
903  accept = 1.0
904  nm.set_sigma(stepsize * 2 * accept)
905 
906  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
907  adjust_stepsize=True):
908  """run hmc, update stepsize and print statistics. Updates number of MD
909  steps to reach a target acceptance between 0.4 and 0.6, sends
910  statistics to self.stat. MD steps are always at least 10 and at
911  most 500.
912  """
913  hmc.optimize(nsteps)
914  naccept = hmc.get_number_of_forward_steps()
915  self.stat.increment_counter(stats_key, nsteps)
916  accept = float(naccept) / nsteps
917  self.stat.update(stats_key, 'acceptance', accept)
918  mdsteps = mv.get_number_of_steps()
919  self.stat.update(stats_key, 'n_md_steps', mdsteps)
920  if adjust_stepsize:
921  if 0.4 < accept < 0.6:
922  return
923  mdsteps = int(mdsteps * 2 ** (accept - 0.5))
924  if mdsteps > 500:
925  mdsteps = 500
926  if mdsteps < 10:
927  mdsteps = 10
928  mv.set_nsteps(mdsteps)
929 
930  def get_pdb(self, prot):
931  """returns a string corresponding to the pdb structure of hierarchy
932  prot.
933  """
934  output = StringIO()
935  IMP.atom.write_pdb(prot, output)
936  return output.getvalue()
937 
938  def get_netcdf(self, prot):
939  raise NotImplementedError
940 
941  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
942  temperature=300.0, prot_coordinates=None):
943  """updates statistics for md simulation: target temp, kinetic energy,
944  kinetic temperature, writes coordinates and increments counter.
945  - md_key: stats md key
946  - nsteps: number of steps performed.
947  - md_instance: instance of the MolecularDynamics class.
948  - temperature: target temperature
949  - prot_coordinates: protein coordinates to be passed to the stats
950  class, (should be a string)
951  """
952  self.stat.update(md_key, 'target_temp', temperature)
953  kinetic = md_instance.get_kinetic_energy()
954  self.stat.update(md_key, 'E_kinetic', kinetic)
955  self.stat.update(md_key, 'instant_temp',
956  md_instance.get_kinetic_temperature(kinetic))
957  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
958  self.stat.increment_counter(md_key, nsteps)
959 
960  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
961  """shortcut for a frequent series of operations on MC simulations'
962  statistics. Creates an entry for acceptance, stepsize and one
963  coordinate set printed in the statistics file.
964  """
965  # create category
966  mc_key = stat.add_category(name=name)
967  # giving None as argument is a way to create a static entry.
968  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
969  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
970  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
971  # special call to add coordinates to be dumped
972  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
973  # add the counter to the output
974  stat.add_entry(mc_key, name='counter')
975  return mc_key
976 
977  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
978  """shortcut for a frequent series of operations on MD simulations'
979  statistics. Creates an entry for target temp, instantaneous temp,
980  kinetic energy, and one set of coordinates called 'protein' by
981  default.
982  """
983  # create category
984  md_key = stat.add_category(name=name)
985  # giving None as argument is a way to create a static entry.
986  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
987  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
988  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
989  # special call to add coordinates to be dumped
990  stat.add_coordinates(md_key, coord)
991  # add the counter to the output
992  stat.add_entry(md_key, name='counter')
993  return md_key
994 
995  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
996  """shortcut for a frequent series of operations on HMC simulations'
997  statistics. Adds acceptance, number of MD steps and a trajectory for
998  a protein.
999  """
1000  # create category
1001  hmc_key = stat.add_category(name=name)
1002  # giving None as argument is a way to create a static entry.
1003  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
1004  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
1005  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
1006  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
1007  # special call to add coordinates to be dumped
1008  stat.add_coordinates(hmc_key, coord)
1009  # add the counter to the output
1010  stat.add_entry(hmc_key, name='counter')
1011  return hmc_key
1012 
1013  def rescale_velocities(self, particles, factor):
1014  """rescale the velocities of a bunch of particles having vx vy and vz
1015  floatkeys
1016  """
1017  keys = [IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
1018  for p in particles:
1019  for k in keys:
1020  p.set_value(k, p.get_value(k) * factor)
Applies a SingletonScore to each Singleton in a list.
def rescale_velocities
rescale the velocities of a bunch of particles having vx vy and vz floatkeys
CHARMMParameters * get_heavy_atom_CHARMM_parameters()
A Monte Carlo optimizer.
Definition: MonteCarlo.h:44
def init_model_ambiguous_NOE_restraint
Reads an ambiguous NOE restraint.
Maintains temperature during molecular dynamics.
Enforce CHARMM stereochemistry on the given Hierarchy.
Atoms get_phi_dihedral_atoms(Residue rd)
Return the atoms comprising the phi dihedral.
def do_md_protein_statistics
updates statistics for md simulation: target temp, kinetic energy, kinetic temperature, writes coordinates and increments counter.
def init_model_charmm_protein_and_ff
creates a CHARMM protein representation.
def find_atom
scans the prot hierarchy and tries to find atom = (resno, name) assumes that resno follows the same n...
def init_model_NOEs
read TBL file and store NOE restraints, using one sigma and one gamma for the whole dataset...
Various classes to hold sets of particles.
def init_simulation_setup_nuisance_mc
sets up monte carlo on nuisance, at a certain target temperature, optionally using a certain set of r...
A decorator for a particle which has bonds.
def init_simulation_setup_protein_hmc_nve
setup hybrid monte-carlo on protein.
Calculate score based on fit to SAXS profile.
A simple steepest descent optimizer.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
def init_model_TALOS
read TALOS dihedral angle data, and create restraints for phi/psi torsion angles, along with the prio...
Ambiguous NOE distance restraint between a number of pairs of particles.
The type of an atom.
def get_pdb
returns a string corresponding to the pdb structure of hierarchy prot.
def init_model_base
moves to wd and creates model
def init_model_NOEs_marginal
read TBL file and store NOE restraints, using the marginal of the lognormal with one sigma and one ga...
Return all close unordered pairs of particles taken from the SingletonContainer.
Classes to handle ISD statistics files.
Definition: Entry.py:1
def init_model_NOE_restraint
Sets up a lognormal distance restraint using the given sigma and gamma.
void read_pdb(TextInput input, int model, Hierarchy h)
CHARMM force field parameters.
Object used to hold a set of restraints.
Definition: RestraintSet.h:41
Bond create_custom_bond(Bonded a, Bonded b, Float length, Float stiffness=-1)
Connect the two wrapped particles by a custom bond.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Removes rigid translation and rotation from the particles.
def init_model_vonMises_restraint_full
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
Store a list of ParticleIndexPairs.
Conjugate prior for the concentration parameter of a von Mises distribution.
Maintains temperature during molecular dynamics by velocity scaling.
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
Get the indexes from a list of particle pairs.
def init_model_jeffreys_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Simple molecular dynamics simulator.
Store a list of ParticleIndexes.
Atoms get_psi_dihedral_atoms(Residue rd)
Return the atoms comprising the psi dihedral.
def init_stats_add_md_category
shortcut for a frequent series of operations on MD simulations' statistics.
def init_model_HBonds_marginal
read TBL file and store lognormal restraints, using the marginal of the lognormal with one sigma and ...
Apply an NOE distance restraint between two particles.
Definition: NOERestraint.h:23
Base class for all optimizers.
Definition: Optimizer.h:48
static Bonded setup_particle(Model *m, ParticleIndex pi)
Jeffreys prior for the concentration parameter of a von Mises distribution.
def init_stats_add_mc_category
shortcut for a frequent series of operations on MC simulations' statistics.
def init_simulation_setup_protein_hmc_hopper
setup hybrid monte-carlo on protein.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
static Scale setup_particle(Model *m, ParticleIndex pi)
Definition: Scale.h:30
phi/psi dihedral restraint between four particles, using data from TALOS.
Score the bond based on a UnaryFunction,.
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:275
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:12
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:23
nonspecific methods used across all shared function objects.
void set_log_level(LogLevel l)
Set the current global log level.
A decorator for a residue.
Definition: Residue.h:137
Apply a lognormal distance restraint between two particles.
Basic functionality that is expected to be used by a wide variety of IMP users.
def init_model_standard_SAXS_restraint
read experimental SAXS profile and apply restraint the standard way (like foxs) Returns: a restraints...
def init_model_setup_scale
sets up a Scale particle to the initial default value.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def init_model_jeffreys
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of Jeffr...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
A filter that excludes bonds, angles and dihedrals.
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def init_model_vonMises_restraint_mean
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
def init_stats_add_hmc_category
shortcut for a frequent series of operations on HMC simulations' statistics.
def m
wrapper to call methods of m
Apply an NOE distance restraint between two particles.
Functionality for loading, creating, manipulating and scoring atomic structures.
CHARMMParameters * get_all_atom_CHARMM_parameters()
Miscellaneous utilities.
Definition: utils.py:1
Hierarchies get_leaves(const Selection &h)
Classes to handle TALOS files or folders.
Definition: TALOSReader.py:1
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Applies a PairScore to each Pair in a list.
Support for small angle X-ray scattering (SAXS) data.
void set_check_level(CheckLevel tf)
Control runtime checks in the code.
Definition: exception.h:72
def init_model_conjugate_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
Classes to handle TBL files.
Definition: TBLReader.py:1
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:27
Maintains temperature during molecular dynamics.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27