IMP  2.4.0
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import sys
4 import os
5 from glob import glob
6 
7 import IMP
8 import IMP.atom
9 import IMP.container
10 import IMP.saxs
11 import IMP.core
12 import IMP.isd
13 import IMP.isd.TBLReader
15 import IMP.isd.utils
16 from IMP.isd.Entry import Entry
17 
18 from math import sqrt, exp
19 from io import StringIO
20 
21 from random import random
22 
23 kB = (1.381 * 6.02214) / 4184.0
24 
25 
26 class PyMDMover:
27 
28  def __init__(self, cont, md, n_md_steps):
29  self.cont = cont
30  self.md = md
31  self.n_md_steps = n_md_steps
32  self.velkeys = [IMP.FloatKey("vx"),
33  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
34 
35  def store_move(self):
36  self.oldcoords = []
37  # self.oldvel=[]
38  for i in range(self.cont.get_number_of_particles()):
39  p = self.cont.get_particle(i)
40  #self.oldvel.append([p.get_value(k) for k in self.velkeys])
41  d = IMP.core.XYZ(p)
42  self.oldcoords.append(d.get_coordinates())
43 
44  def propose_move(self, prob):
45  self.md.optimize(self.n_md_steps)
46 
47  def reset_move(self):
48  for i in range(self.cont.get_number_of_particles()):
49  p = self.cont.get_particle(i)
50  # for j,k in enumerate(self.velkeys):
51  # p.set_value(k, self.oldvel[i][j])
52  d = IMP.core.XYZ(p)
53  d.set_coordinates(self.oldcoords[i])
54 
55  def get_number_of_steps(self):
56  return self.n_md_steps
57 
58  def set_number_of_steps(self, nsteps):
59  self.n_md_steps = nsteps
60 
61 
62 class PyMC(IMP.Optimizer):
63  debug = True
64 
65  def __init__(self, model):
66  self.m = model
67 
68  def add_mover(self, mv):
69  self.mv = mv
70 
71  def set_kt(self, kT):
72  self.kT = kT
73 
74  def set_return_best(self, thing):
75  pass
76 
77  def set_move_probability(self, thing):
78  pass
79 
80  def get_energy(self):
81  pot = self.m.evaluate(False)
82  # pot=self.get_restraints().evaluate(False)
83  kin = self.mv.md.get_kinetic_energy()
84  return pot + kin
85 
86  def metropolis(self, old, new):
87  deltaE = new - old
88  if self.debug:
89  print(": old %f new %f deltaE %f new_epot: %f" % (old, new, deltaE,
90  self.m.evaluate(
91  False)), end=' ')
92  kT = self.kT
93  if deltaE < 0:
94  return True
95  else:
96  return exp(-deltaE / kT) > random()
97 
98  def optimize(self, nsteps):
99  self.naccept = 0
100  print("=== new MC call")
101  # store initial coordinates
102  self.mv.store_move()
103  for i in range(nsteps):
104  print("MC step %d " % i, end=' ')
105  # draw new velocities
106  self.mv.md.assign_velocities(self.kT / kB)
107  # get total energy
108  old = self.get_energy()
109  # make a MD move
110  self.mv.propose_move(1)
111  # get new total energy
112  new = self.get_energy()
113  if self.metropolis(old, new):
114  # move was accepted: store new conformation
115  self.mv.store_move()
116  self.naccept += 1
117  print("accepted ")
118  else:
119  # move rejected: restore old conformation
120  self.mv.reset_move()
121  print(" ")
122 
123  def get_number_of_forward_steps(self):
124  return self.naccept
125 
126 
128 
129  """nonspecific methods used across all shared function objects.
130  Rules:
131  - Their name starts with the name of the parent function (e.g.
132  init_model_* )
133  - they don't store anything in the class, but instead
134  return all created objects.
135  Exceptions: the model, which is self._m
136  the statistics class, which is self.stat
137  - they store what they have to store in the model (e.g. restraints)
138  - they don't print out anything except for long routines (e.g. NOE
139  parsing)
140  - the prior RestraintSet is added to the model when it is created, so
141  that when it is passed to another function, it is not added twice.
142  """
143 
144  def hello(self):
145  return "hello world"
146 
147  def set_checklevel(self, value):
149 
150  def set_loglevel(self, value):
152 
153  def m(self, name, *args, **kw):
154  "wrapper to call methods of m"
155  func = getattr(self._m, name)
156  return func(*args, **kw)
157 
158  def init_model_base(self, wd):
159  "moves to wd and creates model"
160  os.chdir(wd)
161  # Create an IMP model
162  self._m = IMP.Model()
163 
165  self, initpdb, top, par, selector, pairscore,
166  ff_temp=300.0, disulfides=None, representation='custom'):
167  """creates a CHARMM protein representation.
168  creates the charmm force field, bonded and nonbonded.
169  - initpdb: initial structure in pdb format
170  - top is a CHARMM top.lib, read if representation=='custom' (default)
171  - par is a CHARMM par.lib
172  - selector is an instance of
173  one of the selectors of IMP.atom, for example
174  IMP.atom.NonWaterNonHydrogenPDBSelector().
175  - pairscore is an instance of a Pair Score to score the interaction
176  between two atoms. usually, it's either
177  LennardJonesDistancePairScore(0,1) or
178  RepulsiveDistancePairScore(0,1)
179  - ff_temp is the temperature at which the force field should be
180  simulated.
181  - disulfides: if not None, a list of tuples corresponding to residue
182  numbers that should be cross-linked. Residues should be
183  cysteines, and residue numbering should start at 0.
184  - representation: 'full' : all-atom CHARMM force field
185  'heavy': heavy atom CHARMM forcefield with polar H
186  'calpha': only C alphas, ball and stick model with
187  bondlength 3.78 angstrom, beads at VdW
188  contact, and harmonic restraint between
189  them.
190  'custom' (default) : read given CHARMM top and par
191  files.
192  Returns: prot, ff, rsb, rs
193  - prot: the protein
194  - ff: the force field
195  - rsb: the RestraintSet on bonded interactions
196  - rs: the RestraintSet on nonbonded interactions. Both are weighted
197  using ff_temp.
198  """
199  m = self._m
200  prot = IMP.atom.read_pdb(initpdb, m, selector)
201  if not prot.get_is_valid(True):
202  raise ValueError("invalid hierarchy!")
203  if representation == 'custom':
204  # Read in the CHARMM heavy atom topology and parameter files
205  ff = IMP.atom.CHARMMParameters(top, par)
206  elif representation == 'heavy':
208  elif representation == 'full':
210  elif representation == 'calpha':
211  pass
212  else:
213  raise NotImplementedError(representation)
214  if representation == 'calpha':
215  print("setting up simplified C alpha force field")
216  # set radii
217  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
218  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
219  # IMP.atom.Mass(ca.get_particle()).set_mass(1.)
220  # create bonds by getting pairs and applying a pairscore
221  pairs = []
222  for chain in prot.get_children():
223  residues = [(chain.get_child(i), chain.get_child(i + 1))
224  for i in range(chain.get_number_of_children() - 1)]
225  residues = [(i.get_child(0).get_particle(),
226  j.get_child(0).get_particle())
227  for (i, j) in residues]
228  pairs.extend(residues)
229  bonds = []
230  for (i, j) in pairs:
231  # check, because it fails if you try twice
233  bi = IMP.atom.Bonded(i)
234  else:
237  bi = IMP.atom.Bonded(i)
238  else:
240  bond = IMP.atom.create_custom_bond(bi, bj, 3.78, 10) # stiff
241  bonds.append(bond)
242  bonds_container = IMP.container.ListSingletonContainer(bonds)
243  hdps = IMP.core.Harmonic(0, 1)
244  bs = IMP.atom.BondSingletonScore(hdps)
245  br = IMP.container.SingletonsRestraint(bs, bonds_container)
246  rsb = IMP.RestraintSet("bonded")
247  rsb.add_restraint(br)
248  rsb.set_weight(1.0 / (kB * ff_temp))
249  m.add_restraint(rsb)
250  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
251  nonbonded_pair_filter.set_bonds(bonds)
252  ff = None
253  else:
254  print("setting up CHARMM forcefield")
255  #
256  # Using the CHARMM libraries, determine the ideal topology (atoms and their
257  # connectivity) for the PDB file's primary sequence
258  topology = ff.create_topology(prot)
259  # Typically this modifies the C and N termini of each chain in the protein by
260  # applying the CHARMM CTER and NTER patches. Patches can also be manually
261  # applied at this point, e.g. to add disulfide bridges.
262  topology.apply_default_patches()
263  # disulfides
264  if disulfides:
265  s = topology.get_segment(0)
266  dis = ff.get_patch('DISU')
267  for (i, j) in disulfides:
268  self.find_atom((i, 'SG'), prot)
269  self.find_atom((j, 'SG'), prot)
270  r0 = s.get_residue(i)
271  r1 = s.get_residue(j)
272  if i == 0:
273  r0.set_patched(False)
274  if j == 0:
275  r1.set_patched(False)
276  dis.apply(r0, r1)
277  print("added disulfide bridge between cysteines %d and %d" % (i, j))
278  # Make the PDB file conform with the topology; i.e. if it contains extra
279  # atoms that are not in the CHARMM topology file, remove them; if it is
280  # missing atoms (e.g. sidechains, hydrogens) that are in the CHARMM topology,
281  # add them and construct their Cartesian coordinates from internal coordinate
282  # information.
283  topology.setup_hierarchy(prot)
284  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
285  # impropers) of the CHARMM forcefield
286  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
287  rsb = IMP.RestraintSet(m, 1.0, "bonded")
288  rsb.add_restraint(r)
289  m.add_restraint(rsb)
290  #
291  # Add non-bonded interaction (in this case, Lennard-Jones). This needs to
292  # know the radii and well depths for each atom, so add them from the forcefield
293  # (they can also be assigned manually using the XYZR or LennardJones
294  # decorators):
295  ff.add_radii(prot)
296  ff.add_well_depths(prot)
297  nonbonded_pair_filter = r.get_pair_filter()
298  # Get a list of all atoms in the protein, and put it in a container
299  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
301  # Add a restraint for the Lennard-Jones interaction. This is built from
302  # a collection of building blocks. First, a ClosePairContainer maintains a list
303  # of all pairs of Particles that are close. Next, all 1-2, 1-3 and 1-4 pairs
304  # from the stereochemistry created above are filtered out.
305  # Then, a LennardJonesPairScore scores a pair of atoms with the Lennard-Jones
306  # potential. Finally, a PairsRestraint is used which simply applies the
307  # LennardJonesPairScore to each pair in the ClosePairContainer.
308  nbl = IMP.container.ClosePairContainer(cont, 4.0)
309  nbl.add_pair_filter(nonbonded_pair_filter)
310  # should weight the ff restraint by a temperature, set to 300K.
311  pr = IMP.container.PairsRestraint(pairscore, nbl)
312  rs = IMP.RestraintSet(m, 1.0 / (kB * ff_temp), 'phys')
313  rs.add_restraint(pr)
314  m.add_restraint(rs)
315  return prot, ff, rsb, rs
316 
317  def init_model_setup_scale(self, default, lower=None, upper=None):
318  """sets up a Scale particle to the initial default value. It can
319  optionnally be constrained between two positive bounds, or else its
320  range is 0 to infinity.
321  """
322  m = self._m
323  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m), default)
324  if lower:
325  scale.set_lower(lower)
326  if upper:
327  scale.set_upper(upper)
328  return scale
329 
330  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
331  """given a list of scales, returns a RestraintSet('prior') with weight
332  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.
333  If argument prior_rs is used, add them to that RestraintSet instead.
334  """
335  if not prior_rs:
336  prior_rs = IMP.RestraintSet('prior')
337  self._m.add_restraint(prior_rs)
338  prior_rs.set_weight(1.0)
339  for i in scales:
340  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
341  return prior_rs
342 
343  def init_model_jeffreys(self, scales, prior_rs=None):
344  """given a list of scales, returns a RestraintSet('prior') with weight
345  1.0 that contains a list of JeffreysRestraint on each scale.
346  If argument prior_rs is used, add them to that RestraintSet instead.
347  """
348  if not prior_rs:
349  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
350  self._m.add_restraint(prior_rs)
351  for i in scales:
352  jr = IMP.isd.JeffreysRestraint(self._m, i)
353  prior_rs.add_restraint(jr)
354  return prior_rs
355 
356  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
357  """given a list of scales, returns a RestraintSet('prior') with weight
358  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
359  scale. If argument prior_rs is used, add them to that RestraintSet
360  instead.
361  """
362  if not (0 <= R <= c):
363  raise ValueError("parameters R and c should satisfy 0 <= R <= c")
364  if not prior_rs:
365  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
366  self._m.add_restraint(prior_rs)
367  for i in scales:
368  prior_rs.add_restraint(
370  return prior_rs
371 
372  def find_atom(self, atom, prot):
373  """scans the prot hierarchy and tries to find atom = (resno, name)
374  assumes that resno follows the same numbering as the sequence.
375  Stores already found atoms for increased speed.
376  """
377  if not hasattr(self, '__memoized'):
378  self.__memoized = {prot: {}}
379  try:
380  return self.__memoized[prot][atom]
381  except:
382  pass
383  try:
384  sel = IMP.atom.Selection(hierarchy=prot,
385  residue_index=atom[0],
386  atom_type=IMP.atom.AtomType(atom[1])
387  ).get_selected_particles()
388  if len(sel) > 1:
389  print("found multiple atoms for atom %d %s!" % atom)
390  return
391  p0 = IMP.core.XYZ(sel[0])
392  except:
393  print("atom %d %s not found" % atom)
394  return
395  self.__memoized[prot][atom] = p0
396  return p0
397 
398  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
399  """
400  Sets up a vonMises torsion angle restraint using the given kappa
401  particle as concentration parameter. Returns the restraint.
402  data is a list of observations.
403  """
404  return IMP.isd.TALOSRestraint(atoms, data, kappa)
405 
406  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
407  """
408  Sets up a vonMises torsion angle restraint using the given kappa
409  particle as concentration parameter. Returns the restraint.
410  data is (mean, standard deviation).
411  """
412  raise NotImplementedError
413 
414  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
415  """
416  Sets up a lognormal distance restraint using the given sigma and gamma.
417  Returns the restraint.
418  assumes atoms = (atom1, atom2)
419  where atom1 is (resno, atomname) and resno is the residue sequence
420  number.
421  """
422  # find corresponding atoms
423  p0 = self.find_atom(atoms[0], prot)
424  p1 = self.find_atom(atoms[1], prot)
425  # create lognormal restraint using gamma_data = 1
427  self._m,
428  p0,
429  p1,
430  sigma,
431  gamma,
432  distance ** (-6))
433  return ln
434 
435  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
436  sigma, gamma):
437  """Reads an ambiguous NOE restraint. contributions is a list of
438  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
439  lognormal distance restraint using the given sigma and gamma.
440  Returns the restraint.
441  """
442  # create pairs list
443  pairs = [(self.find_atom(i, prot).get_particle(),
444  self.find_atom(j, prot).get_particle()) for (i, j) in
445  contributions]
446  pairs = IMP.container.ListPairContainer(pairs)
447  # create restraint
449  pairs, sigma, gamma, distance ** (-6))
450  return ln
451 
453  self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
454  bounds_sigma=(1.0, 0.1, 100), bounds_gamma=(1.0, 0.1, 100),
455  verbose=True, sequence_match=(1, 1)):
456  """read TBL file and store NOE restraints, using one sigma and one gamma
457  for the whole dataset. Creates the necessary uninformative priors.
458  - prot: protein hierarchy
459  - seqfile: a file with 3-letter sequence
460  - tblfile: a TBL file with the restraints
461  - name: an optional name for the restraintset
462  - prior_rs: when not None, add new sigma and gamma to this
463  RestraintSet instance.
464  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
465  bounds can be -1 to set to default range [0,+inf]
466  - verbose: be verbose (default True)
467  - sequence_match : (noe_start, sequence_start)
468  Returns: data_rs, prior_rs, sigma, gamma
469  """
470  # prior
471  if verbose:
472  print("Prior for the NOE Scales")
473  sigma = self.init_model_setup_scale(*bounds_sigma)
474  gamma = self.init_model_setup_scale(*bounds_gamma)
475  prior_rs = self.init_model_jeffreys([sigma, gamma], prior_rs)
476  # likelihood
477  rs = IMP.RestraintSet(self._m, 1.0, name)
478  # use the TBLReader to parse the TBL file.
479  sequence = IMP.isd.utils.read_sequence_file(seqfile,
480  first_residue_number=sequence_match[1])
481  tblr = IMP.isd.TBLReader.TBLReader(sequence,
482  sequence_match=sequence_match)
483  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
484  for i, restraint in enumerate(restraints):
485  if verbose and i % 100 == 0:
486  print("\r%d" % i, end=' ')
487  sys.stdout.flush()
488  # a restraint is (contributions, dist, upper, lower, volume)
489  # where contributions is a tuple of contributing pairs
490  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
491  if len(restraint[0]) > 1:
493  prot, restraint[0],
494  restraint[1], sigma, gamma)
495  else:
496  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
497  restraint[1], sigma, gamma)
498  rs.add_restraint(ln)
499  if verbose:
500  print("\r%d NOE restraints read" % i)
501  # set weight of rs and add to model.
502  # Weight is 1.0 cause sigma particle already has this role.
503  self._m.add_restraint(rs)
504  return rs, prior_rs, sigma, gamma
505 
506  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
507  verbose=True, sequence_match=(1, 1)):
508  """read TBL file and store NOE restraints, using the marginal of the
509  lognormal with one sigma and one gamma, for the whole dataset.
510  - prot: protein hierarchy
511  - seqfile: a file with 3-letter sequence
512  - tblfile: a TBL file with the restraints
513  - name: an optional name for the restraintset
514  - verbose: be verbose (default True)
515  - sequence_match : (noe_start, sequence_start)
516  Returns: data_rs
517  """
518  # likelihood
519  rs = IMP.RestraintSet(self._m, 1.0, name)
520  # use the TBLReader to parse the TBL file.
521  sequence = IMP.isd.utils.read_sequence_file(seqfile,
522  first_residue_number=sequence_match[1])
523  tblr = IMP.isd.TBLReader.TBLReader(sequence,
524  sequence_match=sequence_match)
525  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
527  for i, restraint in enumerate(restraints):
528  if verbose and i % 100 == 0:
529  print("\r%d" % i, end=' ')
530  sys.stdout.flush()
531  # a restraint is (contributions, dist, upper, lower, volume)
532  # where contributions is a tuple of contributing pairs
533  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
534  # residue numbers start at 0
535  pairs = [(self.find_atom(i, prot).get_particle(),
536  self.find_atom(j, prot).get_particle()) for (i, j) in
537  restraint[0]]
538  pairs = IMP.container.ListPairContainer(pairs)
539  ln.add_contribution(pairs, restraint[4])
540  rs.add_restraint(ln)
541  if verbose:
542  print("\r%d NOE contributions added" % (len(restraints)))
543  self._m.add_restraint(rs)
544  return rs
545 
546  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
547  verbose=True):
548  """read TBL file and store lognormal restraints, using the marginal of the
549  lognormal with one sigma and gamma=1, for the whole dataset.
550  - prot: protein hierarchy
551  - seqfile: a file with 3-letter sequence
552  - tblfile: a TBL file with the restraints
553  - name: an optional name for the restraintset
554  - verbose: be verbose (default True)
555  Returns: data_rs
556  """
557  # likelihood
558  rs = IMP.RestraintSet(self._m, 1.0, name)
559  # use the TBLReader to parse the TBL file.
560  sequence = IMP.isd.utils.read_sequence_file(seqfile)
561  tblr = IMP.isd.TBLReader.TBLReader(sequence)
562  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
564  for i, restraint in enumerate(restraints):
565  if verbose and i % 100 == 0:
566  print("\r%d" % i, end=' ')
567  sys.stdout.flush()
568  # a restraint is (contributions, dist, upper, lower, volume)
569  # where contributions is a tuple of contributing pairs
570  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
571  # residue numbers start at 0
572  pairs = [(self.find_atom(i, prot).get_particle(),
573  self.find_atom(j, prot).get_particle()) for (i, j) in
574  restraint[0]]
575  pairs = IMP.container.ListPairContainer(pairs)
576  ln.add_contribution(pairs, restraint[4])
577  rs.add_restraint(ln)
578  if verbose:
579  print("\r%d Hbond contributions added" % (len(restraints)))
580  self._m.add_restraint(rs)
581  return rs
582 
583  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
584  sequence_match=(1, 1), name='TALOS', prior_rs=None,
585  bounds_kappa=(1.0, 0.1, 10), verbose=True, prior='jeffreys',
586  keep_all=False):
587  """read TALOS dihedral angle data, and create restraints for phi/psi
588  torsion angles, along with the prior for kappa, which is a scale for the
589  whole dataset, compare to 1/sigma**2 in the NOE case.
590  - prot: protein hierarchy
591  - seqfile: a file with 3-letter sequence
592  - talos_data: either a file (pred.tab), or a folder (pred/) in which all
593  files in the form res???.tab can be found. If possible,
594  try to provide the folder, as statistics are more
595  accurate in that case.
596  - fulldata : either True or False, whether the data is the full TALOS
597  output (predAll.tab or pred/ folder), or just the averages
598  (pred.tab)
599  - sequence_match : (talos_no, sequence_no) to adjust for different
600  residue numberings
601  - name: an optional name for the restraintset
602  - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.
603  - bounds_kappa: tuple of (initial value, lower, upper bound)
604  bounds can be -1 to set to default range [0,+inf]
605  - verbose: be verbose (default True)
606  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
607  conjugate prior of the von Mises restraint, with parameters R
608  and c. Good values are R=0 and c=10. Default: jeffreys prior.
609  - keep_all: in case of a folder for 'talos_data', whether to keep
610  candidates marked as 'outliers' by TALOS, or to include them.
611  Returns: data_rs, prior_rs, kappa
612 
613  """
614  # prior
615  if verbose:
616  print("Prior for von Mises Kappa")
617  kappa = self.init_model_setup_scale(*bounds_kappa)
618  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
619  # likelihood
620  if verbose:
621  print("reading data")
622  rs = IMP.RestraintSet(self._m, 1.0, name)
623  sequence = IMP.isd.utils.read_sequence_file(seqfile,
624  first_residue_no=sequence_match[1])
625  if fulldata:
626  talosr = IMP.isd.TALOSReader.TALOSReader(sequence, True, keep_all,
627  sequence_match=sequence_match)
628  if os.path.isdir(talos_data):
629  # using pred/res???.tab files
630  for i, res in enumerate(glob(os.path.join(talos_data, 'res???.tab'))):
631  if verbose and i % 100:
632  print("\r%d" % i, end=' ')
633  sys.stdout.flush()
634  talosr.read(res)
635  else:
636  # using predAll.tab file
637  talosr.read(talos_data)
638  else:
639  # using pred.tab file and correcting for estimates
640  talosr = IMP.isd.TALOSReader.TALOSReader(sequence, False, keep_all,
641  sequence_match=sequence_match)
642  talosr.read(talos_data)
643  # get harvested data and create restraints
644  data = talosr.get_data()
645  if verbose:
646  print("\rread dihedral data for %d residues" % len(data))
647  print("creating restraints")
648  avgR = []
649  for resno, datum in data.items():
650  phidata = datum['phi']
651  psidata = datum['psi']
652  num = datum['num']
653  res = IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
656  if fulldata:
658  phi, phidata, kappa)
659  rs.add_restraint(r)
660  if verbose:
661  avgR.append(r.get_R0())
663  psi, psidata, kappa)
664  rs.add_restraint(r)
665  if verbose:
666  avgR.append(r.get_R0())
667  else:
669  phi, phidata, kappa)
670  rs.add_restraint(r)
671  if verbose:
672  avgR.append(r.get_R0())
674  psi, psidata, kappa)
675  rs.add_restraint(r)
676  if verbose:
677  avgR.append(r.get_R0())
678  if verbose:
679  print("%s Restraints created. Average R0: %f" % \
680  (len(avgR), sum(avgR) / float(len(avgR))))
681  self._m.add_restraint(rs)
682  return rs, prior_rs, kappa
683 
685  self, prot, profilefile, name='SAXS',
686  ff_type=IMP.saxs.HEAVY_ATOMS):
687  """read experimental SAXS profile and apply restraint the standard
688  way (like foxs)
689  Returns: a restraintset and the experimental profile
690  """
691  rs = IMP.RestraintSet(self._m, 1.0, name)
692  saxs_profile = IMP.saxs.Profile(profilefile)
693  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
694  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
695  rs.add_restraint(saxs_restraint)
696  self._m.add_restraint(rs)
697  return rs, saxs_profile
698 
699  def _setup_md(self, prot, temperature=300.0, thermostat='berendsen',
700  coupling=500, md_restraints=None, timestep=1.0, recenter=1000,
701  momentum=1000):
702  """setup molecular dynamics
703  - temperature: target temperature
704  - thermostat: one of 'NVE', rescale_velocities',
705  'berendsen', 'langevin'
706  - coupling: coupling constant (tau (fs) for berendsen,
707  gamma (/fs) for langevin)
708  - md_restraints: if not None, specify the terms of the energy to be used
709  during the md steps via a list of restraints.
710  - timestep: in femtoseconds.
711  - recenter: recenter the molecule every so many steps (Langevin only)
712  - momentum: remove angular momentum every so many steps (Berendsen only)
713  Returns: an instance of md and an instance of an OptimizerState (the
714  thermostat), or None if NVE.
715  """
716  # Molecular Dynamics (from MAX BONOMI)
717  md = IMP.atom.MolecularDynamics(self._m)
718  md.assign_velocities(temperature)
719  md.set_time_step(timestep)
720  if thermostat == 'NVE':
721  os = None
722  elif thermostat == 'rescale_velocities':
724  IMP.atom.get_leaves(prot), temperature)
725  md.add_optimizer_state(os)
726  elif thermostat == 'berendsen':
728  IMP.atom.get_leaves(prot), temperature, coupling)
729  md.add_optimizer_state(os)
731  IMP.atom.get_leaves(prot), momentum)
732  md.add_optimizer_state(mom)
733  elif thermostat == 'langevin':
735  IMP.atom.get_leaves(prot), temperature, coupling)
736  md.add_optimizer_state(os)
737  # cen = IMP.atom.RemoveTranslationOptimizerState(
738  # IMP.atom.get_leaves(prot), recenter)
739  # md.add_optimizer_state(cen)
740  else:
741  raise NotImplementedError(thermostat)
742 
743  if md_restraints:
744  md.set_restraints(md_restraints)
745 
746  return md, os
747 
748  def _setup_normal_mover(self, particle, floatkey, stepsize):
749  """setup NormalMover to move particle's floatkey attribute
750  by a gaussian with standard deviation 'stepsize'
751  Returns: mover instance.
752  """
753  nm = IMP.core.NormalMover(self._m, particle.get_particle_index(),
754  IMP.FloatKeys([floatkey]), stepsize)
755  return nm
756 
757  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
758  """setup MDMover using md and particles.
759  - md: md instance
760  - particles: particles to move, usually the leaves of the protein
761  hierarchy
762  - n_md_steps: number of md steps to perform on each move
763  Returns: mover instance.
764  """
766  cont.add_particles(particles)
767  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
768 
769  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
770  """setup monte carlo using a certain mover.
771  - mover: mover to use, NormalMover or MDMover usually.
772  - temperature: target temperature.
773  - mc_restraints: if not None, list of restraints for the metropolis
774  criterion.
775  Returns: mc instance.
776  """
777  mc = IMP.core.MonteCarlo(self._m)
778  # why is this returning an int?
779  mc.add_mover(mover)
780  # set same temp as MD, careful with units
781  mc.set_kt(kB * temperature)
782  # allow to go uphill
783  mc.set_return_best(False)
784  if mc_restraints:
785  # tell mc to only use restraints involving the particle
786  mc.set_restraints(mc_restraints)
787  return mc
788 
789  def init_simulation_setup_protein_hmc_hopper(self, prot, temperature=300.0,
790  gamma=0.01, n_md_steps=10,
791  md_restraints=None, mc_restraints=None, timestep=1.0,
792  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
793  """setup hybrid monte-carlo on protein. Uses basin hopping with steepest
794  descent minimization.
795  - prot: protein hierarchy.
796  - temperature: target temperature.
797  - gamma: coupling constant for langevin (/fs)
798  - n_md_steps: number of md steps per mc step
799  - md_restraints: if not None, specify the terms of the energy to be used
800  during the md steps.
801  - mc_restraints: if not None, use these energy terms for the metropolis
802  criterion.
803  - timestep: time step for md, in femtoseconds.
804  - sd_threshold: stop steepest descent after energy difference drops
805  below that threshold
806  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
807  - sd_maxsteps: maximum number of steps for steepest descent
808  Returns: hmc, mdmover, md and OptimizerState (thermostat)
809  """
810  raise NotImplementedError
811  md, os = self._setup_md(prot, temperature=temperature,
812  thermostat='langevin', coupling=gamma,
813  md_restraints=md_restraints, timestep=timestep)
814  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
815  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
816  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
817  sd = IMP.core.SteepestDescent(self._m)
818  sd.set_threshold(sd_threshold)
819  sd.set_step_size(sd_stepsize)
820  hmc.set_local_optimizer(sd)
821  hmc.set_local_steps(sd_maxsteps)
822  hmc.set_use_basin_hopping(True)
823  return hmc, mdmover, md, os
824 
825  def init_simulation_setup_protein_hmc_nve(self, prot, temperature=300.0,
826  n_md_steps=100, md_restraints=None, mc_restraints=None,
827  timestep=1.0):
828  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
829  - prot: protein hierarchy.
830  - temperature: target temperature.
831  - coupling: coupling constant (tau (fs) for berendsen,
832  gamma (/fs) for langevin)
833  - n_md_steps: number of md steps per mc step
834  - md_restraints: if not None, specify the terms of the energy to be used
835  during the md steps.
836  - mc_restraints: if not None, use these energy terms for the metropolis
837  criterion.
838  - timestep: time step for md, in femtoseconds.
839  - sd_threshold: stop steepest descent after energy difference drops
840  below that threshold
841  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
842  - sd_maxsteps: maximum number of steps for steepest descent
843  Returns: hmc, mdmover and md
844  """
845  prot = self._p['prot']
846  md, os = self._setup_md(prot, temperature=temperature,
847  thermostat='NVE', coupling=None, timestep=1.0,
848  md_restraints=md_restraints)
849  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
851  cont.add_particles(particles)
852  mdmover = PyMDMover(cont, md, n_md_steps)
853  mdmover.m = self._m
854  #mdmover = IMP.atom.MDMover(cont, md, temperature, n_md_steps)
855  mc = PyMC(self._m)
856  #mc = IMP.core.MonteCarlo(self._m)
857  # why is this returning an int?
858  mc.add_mover(mdmover)
859  # set same temp as MD, careful with units
860  mc.set_kt(kB * temperature)
861  # allow to go uphill
862  mc.set_return_best(False)
863  # update all particles each time
864  mc.set_move_probability(1.0)
865  return mc, mdmover, md
866 
867  def init_simulation_setup_nuisance_mc(self, nuis, temperature=300.0,
868  mc_restraints=None, nm_stepsize=0.1):
869  """sets up monte carlo on nuisance, at a certain target temperature,
870  optionnally using a certain set of restraints only.
871  - nuis: nuisance particle
872  - temperature: target temperature
873  - mc_restraints: optional set of restraints from which the energy should
874  be drawn instead of the energy of the complete system.
875  - floatkey: the floatkey to move.
876  - nm_stepsize: the stepsize of the normal mover
877  Returns: mc instance, nm instance.
878  """
879  nm = self._setup_normal_mover(nuis, nuis.get_nuisance_key(),
880  nm_stepsize)
881  mc = self._setup_mc(nm, temperature, mc_restraints)
882  return mc, nm
883 
884  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
885  adjust_stepsize=True):
886  """run mc using a normal mover on a single particle,
887  update stepsize and store nsteps, acceptance and stepsize in the
888  statistics instance self.stat by using the key stats_key.
889  """
890  # do the monte carlo
891  mc.optimize(nsteps)
892  naccept = mc.get_number_of_forward_steps()
893  # increment the counter for this simulation
894  self.stat.increment_counter(stats_key, nsteps)
895  # get the acceptance rate, stepsize
896  accept = float(naccept) / nsteps
897  self.stat.update(stats_key, 'acceptance', accept)
898  stepsize = nm.get_sigma()
899  self.stat.update(stats_key, 'stepsize', stepsize)
900  # update stepsize if needed and requested
901  if adjust_stepsize:
902  if 0.4 < accept < 0.6:
903  return
904  if accept < 0.05:
905  accept = 0.05
906  if accept > 1.0:
907  accept = 1.0
908  nm.set_sigma(stepsize * 2 * accept)
909 
910  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
911  adjust_stepsize=True):
912  """run hmc, update stepsize and print statistics. Updates number of MD
913  steps to reach a target acceptance between 0.4 and 0.6, sends
914  statistics to self.stat. MD steps are always at least 10 and at most 500.
915  """
916  hmc.optimize(nsteps)
917  naccept = hmc.get_number_of_forward_steps()
918  self.stat.increment_counter(stats_key, nsteps)
919  accept = float(naccept) / nsteps
920  self.stat.update(stats_key, 'acceptance', accept)
921  mdsteps = mv.get_number_of_steps()
922  self.stat.update(stats_key, 'n_md_steps', mdsteps)
923  if adjust_stepsize:
924  if 0.4 < accept < 0.6:
925  return
926  mdsteps = int(mdsteps * 2 ** (accept - 0.5))
927  if mdsteps > 500:
928  mdsteps = 500
929  if mdsteps < 10:
930  mdsteps = 10
931  mv.set_nsteps(mdsteps)
932 
933  def get_pdb(self, prot):
934  """returns a string corresponding to the pdb structure of hierarchy
935  prot.
936  """
937  output = StringIO()
938  IMP.atom.write_pdb(prot, output)
939  return output.getvalue()
940 
941  def get_netcdf(self, prot):
942  raise NotImplementedError
943 
944  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
945  temperature=300.0, prot_coordinates=None):
946  """updates statistics for md simulation: target temp, kinetic energy,
947  kinetic temperature, writes coordinates and increments counter.
948  - md_key: stats md key
949  - nsteps: number of steps performed.
950  - md_instance: instance of the MolecularDynamics class.
951  - temperature: target temperature
952  - prot_coordinates: protein coordinates to be passed to the stats class,
953  (should be a string)
954  """
955  self.stat.update(md_key, 'target_temp', temperature)
956  kinetic = md_instance.get_kinetic_energy()
957  self.stat.update(md_key, 'E_kinetic', kinetic)
958  self.stat.update(md_key, 'instant_temp',
959  md_instance.get_kinetic_temperature(kinetic))
960  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
961  self.stat.increment_counter(md_key, nsteps)
962 
963  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
964  """shortcut for a frequent series of operations on MC simulations'
965  statistics. Creates an entry for acceptance, stepsize and one
966  coordinate set printed in the statistics file.
967  """
968  # create category
969  mc_key = stat.add_category(name=name)
970  # giving None as argument is a way to create a static entry.
971  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
972  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
973  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
974  # special call to add coordinates to be dumped
975  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
976  # add the counter to the output
977  stat.add_entry(mc_key, name='counter')
978  return mc_key
979 
980  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
981  """shortcut for a frequent series of operations on MD simulations'
982  statistics. Creates an entry for target temp, instantaneous temp,
983  kinetic energy, and one set of coordinates called 'protein' by
984  default.
985  """
986  # create category
987  md_key = stat.add_category(name=name)
988  # giving None as argument is a way to create a static entry.
989  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
990  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
991  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
992  # special call to add coordinates to be dumped
993  stat.add_coordinates(md_key, coord)
994  # add the counter to the output
995  stat.add_entry(md_key, name='counter')
996  return md_key
997 
998  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
999  """shortcut for a frequent series of operations on HMC simulations'
1000  statistics. Adds acceptance, number of MD steps and a trajectory for
1001  a protein.
1002  """
1003  # create category
1004  hmc_key = stat.add_category(name=name)
1005  # giving None as argument is a way to create a static entry.
1006  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
1007  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
1008  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
1009  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
1010  # special call to add coordinates to be dumped
1011  stat.add_coordinates(hmc_key, coord)
1012  # add the counter to the output
1013  stat.add_entry(hmc_key, name='counter')
1014  return hmc_key
1015 
1016  def rescale_velocities(self, particles, factor):
1017  """rescale the velocities of a bunch of particles having vx vy and vz
1018  floatkeys
1019  """
1020  keys = [IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
1021  for p in particles:
1022  for k in keys:
1023  p.set_value(k, p.get_value(k) * factor)
Applies a SingletonScore to each Singleton in a list.
def rescale_velocities
rescale the velocities of a bunch of particles having vx vy and vz floatkeys
CHARMMParameters * get_heavy_atom_CHARMM_parameters()
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
def init_model_ambiguous_NOE_restraint
Reads an ambiguous NOE restraint.
Maintains temperature during molecular dynamics.
Enforce CHARMM stereochemistry on the given Hierarchy.
Atoms get_phi_dihedral_atoms(Residue rd)
Return the atoms comprising the phi dihedral.
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
def do_md_protein_statistics
updates statistics for md simulation: target temp, kinetic energy, kinetic temperature, writes coordinates and increments counter.
def init_model_charmm_protein_and_ff
creates a CHARMM protein representation.
def find_atom
scans the prot hierarchy and tries to find atom = (resno, name) assumes that resno follows the same n...
def init_model_NOEs
read TBL file and store NOE restraints, using one sigma and one gamma for the whole dataset...
Various classes to hold sets of particles.
def init_simulation_setup_nuisance_mc
sets up monte carlo on nuisance, at a certain target temperature, optionnally using a certain set of ...
A decorator for a particle which has bonds.
void set_log_level(LogLevel l)
Set the current global log level.
def init_simulation_setup_protein_hmc_nve
setup hybrid monte-carlo on protein.
Calculate score based on fit to SAXS profile.
void set_check_level(CheckLevel tf)
Control runtime checks in the code.
Definition: exception.h:73
Object used to hold a set of restraints.
A simple steepest descent optimizer.
def init_model_TALOS
read TALOS dihedral angle data, and create restraints for phi/psi torsion angles, along with the prio...
static Scale setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: Scale.h:30
Ambiguous NOE distance restraint between a number of pairs of particles.
The type of an atom.
def get_pdb
returns a string corresponding to the pdb structure of hierarchy prot.
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
def init_model_base
moves to wd and creates model
def init_model_NOEs_marginal
read TBL file and store NOE restraints, using the marginal of the lognormal with one sigma and one ga...
Return all close unordered pairs of particles taken from the SingletonContainer.
Classes to handle ISD statistics files.
Definition: Entry.py:1
def init_model_NOE_restraint
Sets up a lognormal distance restraint using the given sigma and gamma.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
CHARMM force field parameters.
Bond create_custom_bond(Bonded a, Bonded b, Float length, Float stiffness=-1)
Connect the two wrapped particles by a custom bond.
Removes rigid translation and rotation from the particles.
def init_model_vonMises_restraint_full
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
Store a kernel::ParticleIndexPairs.
Conjugate prior for the concentration parameter of a von Mises distribution.
Maintains temperature during molecular dynamics by velocity scaling.
static Bonded setup_particle(kernel::Model *m, ParticleIndex pi)
def init_model_jeffreys_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Simple molecular dynamics optimizer.
Store a kernel::ParticleIndexes.
Atoms get_psi_dihedral_atoms(Residue rd)
Return the atoms comprising the psi dihedral.
def init_stats_add_md_category
shortcut for a frequent series of operations on MD simulations' statistics.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
def init_model_HBonds_marginal
read TBL file and store lognormal restraints, using the marginal of the lognormal with one sigma and ...
Apply an NOE distance restraint between two particles.
Definition: NOERestraint.h:23
def init_stats_add_mc_category
shortcut for a frequent series of operations on MC simulations' statistics.
def init_simulation_setup_protein_hmc_hopper
setup hybrid monte-carlo on protein.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class to handle individual model particles.
phi/psi dihedral restraint between four particles, using data from TALOS.
Score the bond based on a UnaryFunction,.
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:322
Base class for all optimizers.
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:14
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
nonspecific methods used across all shared function objects.
A decorator for a residue.
Definition: Residue.h:134
Apply a lognormal distance restraint between two particles.
Basic functionality that is expected to be used by a wide variety of IMP users.
def init_model_standard_SAXS_restraint
read experimental SAXS profile and apply restraint the standard way (like foxs) Returns: a restraints...
def init_model_setup_scale
sets up a Scale particle to the initial default value.
def init_model_jeffreys
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of Jeffr...
A filter that excludes bonds, angles and dihedrals.
def init_model_vonMises_restraint_mean
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
def init_stats_add_hmc_category
shortcut for a frequent series of operations on HMC simulations' statistics.
def m
wrapper to call methods of m
Apply an NOE distance restraint between two particles.
Functionality for loading, creating, manipulating and scoring atomic structures.
void read_pdb(base::TextInput input, int model, Hierarchy h)
CHARMMParameters * get_all_atom_CHARMM_parameters()
Miscellaneous utilities.
Definition: utils.py:1
Hierarchies get_leaves(const Selection &h)
Classes to handle TALOS files or folders.
Definition: TALOSReader.py:1
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62
Applies a PairScore to each Pair in a list.
Support for small angle X-ray scattering (SAXS) data.
def init_model_conjugate_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
Classes to handle TBL files.
Definition: TBLReader.py:1
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24
Maintains temperature during molecular dynamics.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27