IMP  2.2.1
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 import sys
3 import os
4 from glob import glob
5 
6 import IMP
7 import IMP.atom
8 import IMP.container
9 import IMP.saxs
10 import IMP.core
11 import IMP.isd
12 import IMP.isd.TBLReader
14 import IMP.isd.utils
15 from IMP.isd.Entry import Entry
16 
17 from math import sqrt, exp
18 from StringIO import StringIO
19 
20 from random import random
21 
22 kB = (1.381 * 6.02214) / 4184.0
23 
24 
25 class PyMDMover:
26 
27  def __init__(self, cont, md, n_md_steps):
28  self.cont = cont
29  self.md = md
30  self.n_md_steps = n_md_steps
31  self.velkeys = [IMP.FloatKey("vx"),
32  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
33 
34  def store_move(self):
35  self.oldcoords = []
36  # self.oldvel=[]
37  for i in xrange(self.cont.get_number_of_particles()):
38  p = self.cont.get_particle(i)
39  #self.oldvel.append([p.get_value(k) for k in self.velkeys])
40  d = IMP.core.XYZ(p)
41  self.oldcoords.append(d.get_coordinates())
42 
43  def propose_move(self, prob):
44  self.md.optimize(self.n_md_steps)
45 
46  def reset_move(self):
47  for i in xrange(self.cont.get_number_of_particles()):
48  p = self.cont.get_particle(i)
49  # for j,k in enumerate(self.velkeys):
50  # p.set_value(k, self.oldvel[i][j])
51  d = IMP.core.XYZ(p)
52  d.set_coordinates(self.oldcoords[i])
53 
54  def get_number_of_steps(self):
55  return self.n_md_steps
56 
57  def set_number_of_steps(self, nsteps):
58  self.n_md_steps = nsteps
59 
60 
61 class PyMC(IMP.Optimizer):
62  debug = True
63 
64  def __init__(self, model):
65  self.m = model
66 
67  def add_mover(self, mv):
68  self.mv = mv
69 
70  def set_kt(self, kT):
71  self.kT = kT
72 
73  def set_return_best(self, thing):
74  pass
75 
76  def set_move_probability(self, thing):
77  pass
78 
79  def get_energy(self):
80  pot = self.m.evaluate(False)
81  # pot=self.get_restraints().evaluate(False)
82  kin = self.mv.md.get_kinetic_energy()
83  return pot + kin
84 
85  def metropolis(self, old, new):
86  deltaE = new - old
87  if self.debug:
88  print ": old %f new %f deltaE %f new_epot: %f" % (old, new, deltaE,
89  self.m.evaluate(
90  False)),
91  kT = self.kT
92  if deltaE < 0:
93  return True
94  else:
95  return exp(-deltaE / kT) > random()
96 
97  def optimize(self, nsteps):
98  self.naccept = 0
99  print "=== new MC call"
100  # store initial coordinates
101  self.mv.store_move()
102  for i in xrange(nsteps):
103  print "MC step %d " % i,
104  # draw new velocities
105  self.mv.md.assign_velocities(self.kT / kB)
106  # get total energy
107  old = self.get_energy()
108  # make a MD move
109  self.mv.propose_move(1)
110  # get new total energy
111  new = self.get_energy()
112  if self.metropolis(old, new):
113  # move was accepted: store new conformation
114  self.mv.store_move()
115  self.naccept += 1
116  print "accepted "
117  else:
118  # move rejected: restore old conformation
119  self.mv.reset_move()
120  print " "
121 
122  def get_number_of_forward_steps(self):
123  return self.naccept
124 
125 
127 
128  """nonspecific methods used across all shared function objects.
129  Rules:
130  - Their name starts with the name of the parent function (e.g.
131  init_model_* )
132  - they don't store anything in the class, but instead
133  return all created objects.
134  Exceptions: the model, which is self._m
135  the statistics class, which is self.stat
136  - they store what they have to store in the model (e.g. restraints)
137  - they don't print out anything except for long routines (e.g. NOE
138  parsing)
139  - the prior RestraintSet is added to the model when it is created, so
140  that when it is passed to another function, it is not added twice.
141  """
142 
143  def hello(self):
144  return "hello world"
145 
146  def set_checklevel(self, value):
148 
149  def set_loglevel(self, value):
151 
152  def m(self, name, *args, **kw):
153  "wrapper to call methods of m"
154  func = getattr(self._m, name)
155  return func(*args, **kw)
156 
157  def init_model_base(self, wd):
158  "moves to wd and creates model"
159  os.chdir(wd)
160  # Create an IMP model
161  self._m = IMP.Model()
162 
164  self, initpdb, top, par, selector, pairscore,
165  ff_temp=300.0, disulfides=None, representation='custom'):
166  """creates a CHARMM protein representation.
167  creates the charmm force field, bonded and nonbonded.
168  - initpdb: initial structure in pdb format
169  - top is a CHARMM top.lib, read if representation=='custom' (default)
170  - par is a CHARMM par.lib
171  - selector is an instance of
172  one of the selectors of IMP.atom, for example
173  IMP.atom.NonWaterNonHydrogenPDBSelector().
174  - pairscore is an instance of a Pair Score to score the interaction
175  between two atoms. usually, it's either
176  LennardJonesDistancePairScore(0,1) or
177  RepulsiveDistancePairScore(0,1)
178  - ff_temp is the temperature at which the force field should be
179  simulated.
180  - disulfides: if not None, a list of tuples corresponding to residue
181  numbers that should be cross-linked. Residues should be
182  cysteines, and residue numbering should start at 0.
183  - representation: 'full' : all-atom CHARMM force field
184  'heavy': heavy atom CHARMM forcefield with polar H
185  'calpha': only C alphas, ball and stick model with
186  bondlength 3.78 angstrom, beads at VdW
187  contact, and harmonic restraint between
188  them.
189  'custom' (default) : read given CHARMM top and par
190  files.
191  Returns: prot, ff, rsb, rs
192  - prot: the protein
193  - ff: the force field
194  - rsb: the RestraintSet on bonded interactions
195  - rs: the RestraintSet on nonbonded interactions. Both are weighted
196  using ff_temp.
197  """
198  m = self._m
199  prot = IMP.atom.read_pdb(initpdb, m, selector)
200  if not prot.get_is_valid(True):
201  raise ValueError("invalid hierarchy!")
202  if representation == 'custom':
203  # Read in the CHARMM heavy atom topology and parameter files
204  ff = IMP.atom.CHARMMParameters(top, par)
205  elif representation == 'heavy':
207  elif representation == 'full':
209  elif representation == 'calpha':
210  pass
211  else:
212  raise NotImplementedError(representation)
213  if representation == 'calpha':
214  print "setting up simplified C alpha force field"
215  # set radii
216  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
217  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
218  # IMP.atom.Mass(ca.get_particle()).set_mass(1.)
219  # create bonds by getting pairs and applying a pairscore
220  pairs = []
221  for chain in prot.get_children():
222  residues = [(chain.get_child(i), chain.get_child(i + 1))
223  for i in xrange(chain.get_number_of_children() - 1)]
224  residues = [(i.get_child(0).get_particle(),
225  j.get_child(0).get_particle())
226  for (i, j) in residues]
227  pairs.extend(residues)
228  bonds = []
229  for (i, j) in pairs:
230  # check, because it fails if you try twice
232  bi = IMP.atom.Bonded(i)
233  else:
236  bi = IMP.atom.Bonded(i)
237  else:
239  bond = IMP.atom.create_custom_bond(bi, bj, 3.78, 10) # stiff
240  bonds.append(bond)
241  bonds_container = IMP.container.ListSingletonContainer(bonds)
242  hdps = IMP.core.Harmonic(0, 1)
243  bs = IMP.atom.BondSingletonScore(hdps)
244  br = IMP.container.SingletonsRestraint(bs, bonds_container)
245  rsb = IMP.RestraintSet("bonded")
246  rsb.add_restraint(br)
247  rsb.set_weight(1.0 / (kB * ff_temp))
248  m.add_restraint(rsb)
249  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
250  nonbonded_pair_filter.set_bonds(bonds)
251  ff = None
252  else:
253  print "setting up CHARMM forcefield"
254  #
255  # Using the CHARMM libraries, determine the ideal topology (atoms and their
256  # connectivity) for the PDB file's primary sequence
257  topology = ff.create_topology(prot)
258  # Typically this modifies the C and N termini of each chain in the protein by
259  # applying the CHARMM CTER and NTER patches. Patches can also be manually
260  # applied at this point, e.g. to add disulfide bridges.
261  topology.apply_default_patches()
262  # disulfides
263  if disulfides:
264  s = topology.get_segment(0)
265  dis = ff.get_patch('DISU')
266  for (i, j) in disulfides:
267  self.find_atom((i, 'SG'), prot)
268  self.find_atom((j, 'SG'), prot)
269  r0 = s.get_residue(i)
270  r1 = s.get_residue(j)
271  if i == 0:
272  r0.set_patched(False)
273  if j == 0:
274  r1.set_patched(False)
275  dis.apply(r0, r1)
276  print "added disulfide bridge between cysteines %d and %d" % (i, j)
277  # Make the PDB file conform with the topology; i.e. if it contains extra
278  # atoms that are not in the CHARMM topology file, remove them; if it is
279  # missing atoms (e.g. sidechains, hydrogens) that are in the CHARMM topology,
280  # add them and construct their Cartesian coordinates from internal coordinate
281  # information.
282  topology.setup_hierarchy(prot)
283  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
284  # impropers) of the CHARMM forcefield
285  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
286  rsb = IMP.RestraintSet(m, 1.0, "bonded")
287  rsb.add_restraint(r)
288  m.add_restraint(rsb)
289  #
290  # Add non-bonded interaction (in this case, Lennard-Jones). This needs to
291  # know the radii and well depths for each atom, so add them from the forcefield
292  # (they can also be assigned manually using the XYZR or LennardJones
293  # decorators):
294  ff.add_radii(prot)
295  ff.add_well_depths(prot)
296  nonbonded_pair_filter = r.get_pair_filter()
297  # Get a list of all atoms in the protein, and put it in a container
298  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
300  # Add a restraint for the Lennard-Jones interaction. This is built from
301  # a collection of building blocks. First, a ClosePairContainer maintains a list
302  # of all pairs of Particles that are close. Next, all 1-2, 1-3 and 1-4 pairs
303  # from the stereochemistry created above are filtered out.
304  # Then, a LennardJonesPairScore scores a pair of atoms with the Lennard-Jones
305  # potential. Finally, a PairsRestraint is used which simply applies the
306  # LennardJonesPairScore to each pair in the ClosePairContainer.
307  nbl = IMP.container.ClosePairContainer(cont, 4.0)
308  nbl.add_pair_filter(nonbonded_pair_filter)
309  # should weight the ff restraint by a temperature, set to 300K.
310  pr = IMP.container.PairsRestraint(pairscore, nbl)
311  rs = IMP.RestraintSet(m, 1.0 / (kB * ff_temp), 'phys')
312  rs.add_restraint(pr)
313  m.add_restraint(rs)
314  return prot, ff, rsb, rs
315 
316  def init_model_setup_scale(self, default, lower=None, upper=None):
317  """sets up a Scale particle to the initial default value. It can
318  optionnally be constrained between two positive bounds, or else its
319  range is 0 to infinity.
320  """
321  m = self._m
322  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m), default)
323  if lower:
324  scale.set_lower(lower)
325  if upper:
326  scale.set_upper(upper)
327  return scale
328 
329  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
330  """given a list of scales, returns a RestraintSet('prior') with weight
331  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.
332  If argument prior_rs is used, add them to that RestraintSet instead.
333  """
334  if not prior_rs:
335  prior_rs = IMP.RestraintSet('prior')
336  self._m.add_restraint(prior_rs)
337  prior_rs.set_weight(1.0)
338  for i in scales:
339  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
340  return prior_rs
341 
342  def init_model_jeffreys(self, scales, prior_rs=None):
343  """given a list of scales, returns a RestraintSet('prior') with weight
344  1.0 that contains a list of JeffreysRestraint on each scale.
345  If argument prior_rs is used, add them to that RestraintSet instead.
346  """
347  if not prior_rs:
348  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
349  self._m.add_restraint(prior_rs)
350  for i in scales:
351  jr = IMP.isd.JeffreysRestraint(self._m, i)
352  prior_rs.add_restraint(jr)
353  return prior_rs
354 
355  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
356  """given a list of scales, returns a RestraintSet('prior') with weight
357  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
358  scale. If argument prior_rs is used, add them to that RestraintSet
359  instead.
360  """
361  if not (0 <= R <= c):
362  raise ValueError("parameters R and c should satisfy 0 <= R <= c")
363  if not prior_rs:
364  prior_rs = IMP.RestraintSet(self._m, 1.0, 'prior')
365  self._m.add_restraint(prior_rs)
366  for i in scales:
367  prior_rs.add_restraint(
369  return prior_rs
370 
371  def find_atom(self, atom, prot):
372  """scans the prot hierarchy and tries to find atom = (resno, name)
373  assumes that resno follows the same numbering as the sequence.
374  Stores already found atoms for increased speed.
375  """
376  if not hasattr(self, '__memoized'):
377  self.__memoized = {prot: {}}
378  try:
379  return self.__memoized[prot][atom]
380  except:
381  pass
382  try:
383  sel = IMP.atom.Selection(hierarchy=prot,
384  residue_index=atom[0],
385  atom_type=IMP.atom.AtomType(atom[1])
386  ).get_selected_particles()
387  if len(sel) > 1:
388  print "found multiple atoms for atom %d %s!" % atom
389  return
390  p0 = IMP.core.XYZ(sel[0])
391  except:
392  print "atom %d %s not found" % atom
393  return
394  self.__memoized[prot][atom] = p0
395  return p0
396 
397  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
398  """
399  Sets up a vonMises torsion angle restraint using the given kappa
400  particle as concentration parameter. Returns the restraint.
401  data is a list of observations.
402  """
403  return IMP.isd.TALOSRestraint(atoms, data, kappa)
404 
405  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
406  """
407  Sets up a vonMises torsion angle restraint using the given kappa
408  particle as concentration parameter. Returns the restraint.
409  data is (mean, standard deviation).
410  """
411  raise NotImplementedError
412 
413  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
414  """
415  Sets up a lognormal distance restraint using the given sigma and gamma.
416  Returns the restraint.
417  assumes atoms = (atom1, atom2)
418  where atom1 is (resno, atomname) and resno is the residue sequence
419  number.
420  """
421  # find corresponding atoms
422  p0 = self.find_atom(atoms[0], prot)
423  p1 = self.find_atom(atoms[1], prot)
424  # create lognormal restraint using gamma_data = 1
426  self._m,
427  p0,
428  p1,
429  sigma,
430  gamma,
431  distance ** (-6))
432  return ln
433 
434  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
435  sigma, gamma):
436  """Reads an ambiguous NOE restraint. contributions is a list of
437  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
438  lognormal distance restraint using the given sigma and gamma.
439  Returns the restraint.
440  """
441  # create pairs list
442  pairs = [(self.find_atom(i, prot).get_particle(),
443  self.find_atom(j, prot).get_particle()) for (i, j) in
444  contributions]
445  pairs = IMP.container.ListPairContainer(pairs)
446  # create restraint
448  pairs, sigma, gamma, distance ** (-6))
449  return ln
450 
452  self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
453  bounds_sigma=(1.0, 0.1, 100), bounds_gamma=(1.0, 0.1, 100),
454  verbose=True, sequence_match=(1, 1)):
455  """read TBL file and store NOE restraints, using one sigma and one gamma
456  for the whole dataset. Creates the necessary uninformative priors.
457  - prot: protein hierarchy
458  - seqfile: a file with 3-letter sequence
459  - tblfile: a TBL file with the restraints
460  - name: an optional name for the restraintset
461  - prior_rs: when not None, add new sigma and gamma to this
462  RestraintSet instance.
463  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
464  bounds can be -1 to set to default range [0,+inf]
465  - verbose: be verbose (default True)
466  - sequence_match : (noe_start, sequence_start)
467  Returns: data_rs, prior_rs, sigma, gamma
468  """
469  # prior
470  if verbose:
471  print "Prior for the NOE Scales"
472  sigma = self.init_model_setup_scale(*bounds_sigma)
473  gamma = self.init_model_setup_scale(*bounds_gamma)
474  prior_rs = self.init_model_jeffreys([sigma, gamma], prior_rs)
475  # likelihood
476  rs = IMP.RestraintSet(self._m, 1.0, name)
477  # use the TBLReader to parse the TBL file.
478  sequence = IMP.isd.utils.read_sequence_file(seqfile,
479  first_residue_number=sequence_match[1])
480  tblr = IMP.isd.TBLReader.TBLReader(sequence,
481  sequence_match=sequence_match)
482  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
483  for i, restraint in enumerate(restraints):
484  if verbose and i % 100 == 0:
485  print "\r%d" % i,
486  sys.stdout.flush()
487  # a restraint is (contributions, dist, upper, lower, volume)
488  # where contributions is a tuple of contributing pairs
489  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
490  if len(restraint[0]) > 1:
492  prot, restraint[0],
493  restraint[1], sigma, gamma)
494  else:
495  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
496  restraint[1], sigma, gamma)
497  rs.add_restraint(ln)
498  if verbose:
499  print "\r%d NOE restraints read" % i
500  # set weight of rs and add to model.
501  # Weight is 1.0 cause sigma particle already has this role.
502  self._m.add_restraint(rs)
503  return rs, prior_rs, sigma, gamma
504 
505  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
506  verbose=True, sequence_match=(1, 1)):
507  """read TBL file and store NOE restraints, using the marginal of the
508  lognormal with one sigma and one gamma, for the whole dataset.
509  - prot: protein hierarchy
510  - seqfile: a file with 3-letter sequence
511  - tblfile: a TBL file with the restraints
512  - name: an optional name for the restraintset
513  - verbose: be verbose (default True)
514  - sequence_match : (noe_start, sequence_start)
515  Returns: data_rs
516  """
517  # likelihood
518  rs = IMP.RestraintSet(self._m, 1.0, name)
519  # use the TBLReader to parse the TBL file.
520  sequence = IMP.isd.utils.read_sequence_file(seqfile,
521  first_residue_number=sequence_match[1])
522  tblr = IMP.isd.TBLReader.TBLReader(sequence,
523  sequence_match=sequence_match)
524  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
526  for i, restraint in enumerate(restraints):
527  if verbose and i % 100 == 0:
528  print "\r%d" % i,
529  sys.stdout.flush()
530  # a restraint is (contributions, dist, upper, lower, volume)
531  # where contributions is a tuple of contributing pairs
532  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
533  # residue numbers start at 0
534  pairs = [(self.find_atom(i, prot).get_particle(),
535  self.find_atom(j, prot).get_particle()) for (i, j) in
536  restraint[0]]
537  pairs = IMP.container.ListPairContainer(pairs)
538  ln.add_contribution(pairs, restraint[4])
539  rs.add_restraint(ln)
540  if verbose:
541  print "\r%d NOE contributions added" % (len(restraints))
542  self._m.add_restraint(rs)
543  return rs
544 
545  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
546  verbose=True):
547  """read TBL file and store lognormal restraints, using the marginal of the
548  lognormal with one sigma and gamma=1, for the whole dataset.
549  - prot: protein hierarchy
550  - seqfile: a file with 3-letter sequence
551  - tblfile: a TBL file with the restraints
552  - name: an optional name for the restraintset
553  - verbose: be verbose (default True)
554  Returns: data_rs
555  """
556  # likelihood
557  rs = IMP.RestraintSet(self._m, 1.0, name)
558  # use the TBLReader to parse the TBL file.
559  sequence = IMP.isd.utils.read_sequence_file(seqfile)
560  tblr = IMP.isd.TBLReader.TBLReader(sequence)
561  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
563  for i, restraint in enumerate(restraints):
564  if verbose and i % 100 == 0:
565  print "\r%d" % i,
566  sys.stdout.flush()
567  # a restraint is (contributions, dist, upper, lower, volume)
568  # where contributions is a tuple of contributing pairs
569  # and a pair is (c1, c2), where c1 is of the form (resno, atname)
570  # residue numbers start at 0
571  pairs = [(self.find_atom(i, prot).get_particle(),
572  self.find_atom(j, prot).get_particle()) for (i, j) in
573  restraint[0]]
574  pairs = IMP.container.ListPairContainer(pairs)
575  ln.add_contribution(pairs, restraint[4])
576  rs.add_restraint(ln)
577  if verbose:
578  print "\r%d Hbond contributions added" % (len(restraints))
579  self._m.add_restraint(rs)
580  return rs
581 
582  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
583  sequence_match=(1, 1), name='TALOS', prior_rs=None,
584  bounds_kappa=(1.0, 0.1, 10), verbose=True, prior='jeffreys',
585  keep_all=False):
586  """read TALOS dihedral angle data, and create restraints for phi/psi
587  torsion angles, along with the prior for kappa, which is a scale for the
588  whole dataset, compare to 1/sigma**2 in the NOE case.
589  - prot: protein hierarchy
590  - seqfile: a file with 3-letter sequence
591  - talos_data: either a file (pred.tab), or a folder (pred/) in which all
592  files in the form res???.tab can be found. If possible,
593  try to provide the folder, as statistics are more
594  accurate in that case.
595  - fulldata : either True or False, whether the data is the full TALOS
596  output (predAll.tab or pred/ folder), or just the averages
597  (pred.tab)
598  - sequence_match : (talos_no, sequence_no) to adjust for different
599  residue numberings
600  - name: an optional name for the restraintset
601  - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.
602  - bounds_kappa: tuple of (initial value, lower, upper bound)
603  bounds can be -1 to set to default range [0,+inf]
604  - verbose: be verbose (default True)
605  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
606  conjugate prior of the von Mises restraint, with parameters R
607  and c. Good values are R=0 and c=10. Default: jeffreys prior.
608  - keep_all: in case of a folder for 'talos_data', whether to keep
609  candidates marked as 'outliers' by TALOS, or to include them.
610  Returns: data_rs, prior_rs, kappa
611 
612  """
613  # prior
614  if verbose:
615  print "Prior for von Mises Kappa"
616  kappa = self.init_model_setup_scale(*bounds_kappa)
617  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
618  # likelihood
619  if verbose:
620  print "reading data"
621  rs = IMP.RestraintSet(self._m, 1.0, name)
622  sequence = IMP.isd.utils.read_sequence_file(seqfile,
623  first_residue_no=sequence_match[1])
624  if fulldata:
625  talosr = IMP.isd.TALOSReader.TALOSReader(sequence, True, keep_all,
626  sequence_match=sequence_match)
627  if os.path.isdir(talos_data):
628  # using pred/res???.tab files
629  for i, res in enumerate(glob(os.path.join(talos_data, 'res???.tab'))):
630  if verbose and i % 100:
631  print "\r%d" % i,
632  sys.stdout.flush()
633  talosr.read(res)
634  else:
635  # using predAll.tab file
636  talosr.read(talos_data)
637  else:
638  # using pred.tab file and correcting for estimates
639  talosr = IMP.isd.TALOSReader.TALOSReader(sequence, False, keep_all,
640  sequence_match=sequence_match)
641  talosr.read(talos_data)
642  # get harvested data and create restraints
643  data = talosr.get_data()
644  if verbose:
645  print "\rread dihedral data for %d residues" % len(data)
646  print "creating restraints"
647  avgR = []
648  for resno, datum in data.iteritems():
649  phidata = datum['phi']
650  psidata = datum['psi']
651  num = datum['num']
652  res = IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
655  if fulldata:
657  phi, phidata, kappa)
658  rs.add_restraint(r)
659  if verbose:
660  avgR.append(r.get_R0())
662  psi, psidata, kappa)
663  rs.add_restraint(r)
664  if verbose:
665  avgR.append(r.get_R0())
666  else:
668  phi, phidata, kappa)
669  rs.add_restraint(r)
670  if verbose:
671  avgR.append(r.get_R0())
673  psi, psidata, kappa)
674  rs.add_restraint(r)
675  if verbose:
676  avgR.append(r.get_R0())
677  if verbose:
678  print "%s Restraints created. Average R0: %f" % \
679  (len(avgR), sum(avgR) / float(len(avgR)))
680  self._m.add_restraint(rs)
681  return rs, prior_rs, kappa
682 
684  self, prot, profilefile, name='SAXS',
685  ff_type=IMP.saxs.HEAVY_ATOMS):
686  """read experimental SAXS profile and apply restraint the standard
687  way (like foxs)
688  Returns: a restraintset and the experimental profile
689  """
690  rs = IMP.RestraintSet(self._m, 1.0, name)
691  saxs_profile = IMP.saxs.Profile(profilefile)
692  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
693  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
694  rs.add_restraint(saxs_restraint)
695  self._m.add_restraint(rs)
696  return rs, saxs_profile
697 
698  def _setup_md(self, prot, temperature=300.0, thermostat='berendsen',
699  coupling=500, md_restraints=None, timestep=1.0, recenter=1000,
700  momentum=1000):
701  """setup molecular dynamics
702  - temperature: target temperature
703  - thermostat: one of 'NVE', rescale_velocities',
704  'berendsen', 'langevin'
705  - coupling: coupling constant (tau (fs) for berendsen,
706  gamma (/fs) for langevin)
707  - md_restraints: if not None, specify the terms of the energy to be used
708  during the md steps via a list of restraints.
709  - timestep: in femtoseconds.
710  - recenter: recenter the molecule every so many steps (Langevin only)
711  - momentum: remove angular momentum every so many steps (Berendsen only)
712  Returns: an instance of md and an instance of an OptimizerState (the
713  thermostat), or None if NVE.
714  """
715  # Molecular Dynamics (from MAX BONOMI)
716  md = IMP.atom.MolecularDynamics(self._m)
717  md.assign_velocities(temperature)
718  md.set_time_step(timestep)
719  if thermostat == 'NVE':
720  os = None
721  elif thermostat == 'rescale_velocities':
723  IMP.atom.get_leaves(prot), temperature)
724  md.add_optimizer_state(os)
725  elif thermostat == 'berendsen':
727  IMP.atom.get_leaves(prot), temperature, coupling)
728  md.add_optimizer_state(os)
730  IMP.atom.get_leaves(prot), momentum)
731  md.add_optimizer_state(mom)
732  elif thermostat == 'langevin':
734  IMP.atom.get_leaves(prot), temperature, coupling)
735  md.add_optimizer_state(os)
736  # cen = IMP.atom.RemoveTranslationOptimizerState(
737  # IMP.atom.get_leaves(prot), recenter)
738  # md.add_optimizer_state(cen)
739  else:
740  raise NotImplementedError(thermostat)
741 
742  if md_restraints:
743  md.set_restraints(md_restraints)
744 
745  return md, os
746 
747  def _setup_normal_mover(self, particle, floatkey, stepsize):
748  """setup NormalMover to move particle's floatkey attribute
749  by a gaussian with standard deviation 'stepsize'
750  Returns: mover instance.
751  """
752  nm = IMP.core.NormalMover(self._m, particle.get_particle_index(),
753  IMP.FloatKeys([floatkey]), stepsize)
754  return nm
755 
756  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
757  """setup MDMover using md and particles.
758  - md: md instance
759  - particles: particles to move, usually the leaves of the protein
760  hierarchy
761  - n_md_steps: number of md steps to perform on each move
762  Returns: mover instance.
763  """
765  cont.add_particles(particles)
766  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
767 
768  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
769  """setup monte carlo using a certain mover.
770  - mover: mover to use, NormalMover or MDMover usually.
771  - temperature: target temperature.
772  - mc_restraints: if not None, list of restraints for the metropolis
773  criterion.
774  Returns: mc instance.
775  """
776  mc = IMP.core.MonteCarlo(self._m)
777  # why is this returning an int?
778  mc.add_mover(mover)
779  # set same temp as MD, careful with units
780  mc.set_kt(kB * temperature)
781  # allow to go uphill
782  mc.set_return_best(False)
783  if mc_restraints:
784  # tell mc to only use restraints involving the particle
785  mc.set_restraints(mc_restraints)
786  return mc
787 
788  def init_simulation_setup_protein_hmc_hopper(self, prot, temperature=300.0,
789  gamma=0.01, n_md_steps=10,
790  md_restraints=None, mc_restraints=None, timestep=1.0,
791  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
792  """setup hybrid monte-carlo on protein. Uses basin hopping with steepest
793  descent minimization.
794  - prot: protein hierarchy.
795  - temperature: target temperature.
796  - gamma: coupling constant for langevin (/fs)
797  - n_md_steps: number of md steps per mc step
798  - md_restraints: if not None, specify the terms of the energy to be used
799  during the md steps.
800  - mc_restraints: if not None, use these energy terms for the metropolis
801  criterion.
802  - timestep: time step for md, in femtoseconds.
803  - sd_threshold: stop steepest descent after energy difference drops
804  below that threshold
805  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
806  - sd_maxsteps: maximum number of steps for steepest descent
807  Returns: hmc, mdmover, md and OptimizerState (thermostat)
808  """
809  raise NotImplementedError
810  md, os = self._setup_md(prot, temperature=temperature,
811  thermostat='langevin', coupling=gamma,
812  md_restraints=md_restraints, timestep=timestep)
813  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
814  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
815  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
816  sd = IMP.core.SteepestDescent(self._m)
817  sd.set_threshold(sd_threshold)
818  sd.set_step_size(sd_stepsize)
819  hmc.set_local_optimizer(sd)
820  hmc.set_local_steps(sd_maxsteps)
821  hmc.set_use_basin_hopping(True)
822  return hmc, mdmover, md, os
823 
824  def init_simulation_setup_protein_hmc_nve(self, prot, temperature=300.0,
825  n_md_steps=100, md_restraints=None, mc_restraints=None,
826  timestep=1.0):
827  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
828  - prot: protein hierarchy.
829  - temperature: target temperature.
830  - coupling: coupling constant (tau (fs) for berendsen,
831  gamma (/fs) for langevin)
832  - n_md_steps: number of md steps per mc step
833  - md_restraints: if not None, specify the terms of the energy to be used
834  during the md steps.
835  - mc_restraints: if not None, use these energy terms for the metropolis
836  criterion.
837  - timestep: time step for md, in femtoseconds.
838  - sd_threshold: stop steepest descent after energy difference drops
839  below that threshold
840  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
841  - sd_maxsteps: maximum number of steps for steepest descent
842  Returns: hmc, mdmover and md
843  """
844  prot = self._p['prot']
845  md, os = self._setup_md(prot, temperature=temperature,
846  thermostat='NVE', coupling=None, timestep=1.0,
847  md_restraints=md_restraints)
848  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
850  cont.add_particles(particles)
851  mdmover = PyMDMover(cont, md, n_md_steps)
852  mdmover.m = self._m
853  #mdmover = IMP.atom.MDMover(cont, md, temperature, n_md_steps)
854  mc = PyMC(self._m)
855  #mc = IMP.core.MonteCarlo(self._m)
856  # why is this returning an int?
857  mc.add_mover(mdmover)
858  # set same temp as MD, careful with units
859  mc.set_kt(kB * temperature)
860  # allow to go uphill
861  mc.set_return_best(False)
862  # update all particles each time
863  mc.set_move_probability(1.0)
864  return mc, mdmover, md
865 
866  def init_simulation_setup_nuisance_mc(self, nuis, temperature=300.0,
867  mc_restraints=None, nm_stepsize=0.1):
868  """sets up monte carlo on nuisance, at a certain target temperature,
869  optionnally using a certain set of restraints only.
870  - nuis: nuisance particle
871  - temperature: target temperature
872  - mc_restraints: optional set of restraints from which the energy should
873  be drawn instead of the energy of the complete system.
874  - floatkey: the floatkey to move.
875  - nm_stepsize: the stepsize of the normal mover
876  Returns: mc instance, nm instance.
877  """
878  nm = self._setup_normal_mover(nuis, nuis.get_nuisance_key(),
879  nm_stepsize)
880  mc = self._setup_mc(nm, temperature, mc_restraints)
881  return mc, nm
882 
883  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
884  adjust_stepsize=True):
885  """run mc using a normal mover on a single particle,
886  update stepsize and store nsteps, acceptance and stepsize in the
887  statistics instance self.stat by using the key stats_key.
888  """
889  # do the monte carlo
890  mc.optimize(nsteps)
891  naccept = mc.get_number_of_forward_steps()
892  # increment the counter for this simulation
893  self.stat.increment_counter(stats_key, nsteps)
894  # get the acceptance rate, stepsize
895  accept = float(naccept) / nsteps
896  self.stat.update(stats_key, 'acceptance', accept)
897  stepsize = nm.get_sigma()
898  self.stat.update(stats_key, 'stepsize', stepsize)
899  # update stepsize if needed and requested
900  if adjust_stepsize:
901  if 0.4 < accept < 0.6:
902  return
903  if accept < 0.05:
904  accept = 0.05
905  if accept > 1.0:
906  accept = 1.0
907  nm.set_sigma(stepsize * 2 * accept)
908 
909  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
910  adjust_stepsize=True):
911  """run hmc, update stepsize and print statistics. Updates number of MD
912  steps to reach a target acceptance between 0.4 and 0.6, sends
913  statistics to self.stat. MD steps are always at least 10 and at most 500.
914  """
915  hmc.optimize(nsteps)
916  naccept = hmc.get_number_of_forward_steps()
917  self.stat.increment_counter(stats_key, nsteps)
918  accept = float(naccept) / nsteps
919  self.stat.update(stats_key, 'acceptance', accept)
920  mdsteps = mv.get_number_of_steps()
921  self.stat.update(stats_key, 'n_md_steps', mdsteps)
922  if adjust_stepsize:
923  if 0.4 < accept < 0.6:
924  return
925  mdsteps = int(mdsteps * 2 ** (accept - 0.5))
926  if mdsteps > 500:
927  mdsteps = 500
928  if mdsteps < 10:
929  mdsteps = 10
930  mv.set_nsteps(mdsteps)
931 
932  def get_pdb(self, prot):
933  """returns a string corresponding to the pdb structure of hierarchy
934  prot.
935  """
936  output = StringIO()
937  IMP.atom.write_pdb(prot, output)
938  return output.getvalue()
939 
940  def get_netcdf(self, prot):
941  raise NotImplementedError
942 
943  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
944  temperature=300.0, prot_coordinates=None):
945  """updates statistics for md simulation: target temp, kinetic energy,
946  kinetic temperature, writes coordinates and increments counter.
947  - md_key: stats md key
948  - nsteps: number of steps performed.
949  - md_instance: instance of the MolecularDynamics class.
950  - temperature: target temperature
951  - prot_coordinates: protein coordinates to be passed to the stats class,
952  (should be a string)
953  """
954  self.stat.update(md_key, 'target_temp', temperature)
955  kinetic = md_instance.get_kinetic_energy()
956  self.stat.update(md_key, 'E_kinetic', kinetic)
957  self.stat.update(md_key, 'instant_temp',
958  md_instance.get_kinetic_temperature(kinetic))
959  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
960  self.stat.increment_counter(md_key, nsteps)
961 
962  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
963  """shortcut for a frequent series of operations on MC simulations'
964  statistics. Creates an entry for acceptance, stepsize and one
965  coordinate set printed in the statistics file.
966  """
967  # create category
968  mc_key = stat.add_category(name=name)
969  # giving None as argument is a way to create a static entry.
970  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
971  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
972  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
973  # special call to add coordinates to be dumped
974  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
975  # add the counter to the output
976  stat.add_entry(mc_key, name='counter')
977  return mc_key
978 
979  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
980  """shortcut for a frequent series of operations on MD simulations'
981  statistics. Creates an entry for target temp, instantaneous temp,
982  kinetic energy, and one set of coordinates called 'protein' by
983  default.
984  """
985  # create category
986  md_key = stat.add_category(name=name)
987  # giving None as argument is a way to create a static entry.
988  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
989  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
990  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
991  # special call to add coordinates to be dumped
992  stat.add_coordinates(md_key, coord)
993  # add the counter to the output
994  stat.add_entry(md_key, name='counter')
995  return md_key
996 
997  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
998  """shortcut for a frequent series of operations on HMC simulations'
999  statistics. Adds acceptance, number of MD steps and a trajectory for
1000  a protein.
1001  """
1002  # create category
1003  hmc_key = stat.add_category(name=name)
1004  # giving None as argument is a way to create a static entry.
1005  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
1006  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
1007  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
1008  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
1009  # special call to add coordinates to be dumped
1010  stat.add_coordinates(hmc_key, coord)
1011  # add the counter to the output
1012  stat.add_entry(hmc_key, name='counter')
1013  return hmc_key
1014 
1015  def rescale_velocities(self, particles, factor):
1016  """rescale the velocities of a bunch of particles having vx vy and vz
1017  floatkeys
1018  """
1019  keys = [IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
1020  for p in particles:
1021  for k in keys:
1022  p.set_value(k, p.get_value(k) * factor)
Applies a SingletonScore to each Singleton in a list.
def rescale_velocities
rescale the velocities of a bunch of particles having vx vy and vz floatkeys
CHARMMParameters * get_heavy_atom_CHARMM_parameters()
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
def init_model_ambiguous_NOE_restraint
Reads an ambiguous NOE restraint.
Maintains temperature during molecular dynamics.
Enforce CHARMM stereochemistry on the given Hierarchy.
Atoms get_phi_dihedral_atoms(Residue rd)
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
def do_md_protein_statistics
updates statistics for md simulation: target temp, kinetic energy, kinetic temperature, writes coordinates and increments counter.
def init_model_charmm_protein_and_ff
creates a CHARMM protein representation.
def find_atom
scans the prot hierarchy and tries to find atom = (resno, name) assumes that resno follows the same n...
def init_model_NOEs
read TBL file and store NOE restraints, using one sigma and one gamma for the whole dataset...
See IMP.container for more information.
def init_simulation_setup_nuisance_mc
sets up monte carlo on nuisance, at a certain target temperature, optionnally using a certain set of ...
A decorator for a particle which has bonds.
void set_log_level(LogLevel l)
Set the current global log level.
def init_simulation_setup_protein_hmc_nve
setup hybrid monte-carlo on protein.
Calculate score based on fit to SAXS profile.
void set_check_level(CheckLevel tf)
Control runtime checks in the code.
Object used to hold a set of restraints.
A simple steepest descent optimizer.
def init_model_TALOS
read TALOS dihedral angle data, and create restraints for phi/psi torsion angles, along with the prio...
static Scale setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: Scale.h:30
Ambiguous NOE distance restraint between a number of pairs of particles.
The type of an atom.
def get_pdb
returns a string corresponding to the pdb structure of hierarchy prot.
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
def init_model_base
moves to wd and creates model
def init_model_NOEs_marginal
read TBL file and store NOE restraints, using the marginal of the lognormal with one sigma and one ga...
Return all close unordered pairs of particles taken from the SingletonContainer.
Classes to handle ISD statistics files.
Definition: Entry.py:1
def init_model_NOE_restraint
Sets up a lognormal distance restraint using the given sigma and gamma.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
CHARMM force field parameters.
Bond create_custom_bond(Bonded a, Bonded b, Float length, Float stiffness=-1)
Connect the two wrapped particles by a custom bond.
Removes rigid translation and rotation from the particles.
def init_model_vonMises_restraint_full
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
Store a kernel::ParticleIndexPairs.
Conjugate prior for the concentration parameter of a von Mises distribution.
Maintains temperature during molecular dynamics by velocity scaling.
static Bonded setup_particle(kernel::Model *m, ParticleIndex pi)
def init_model_jeffreys_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Simple molecular dynamics optimizer.
Store a kernel::ParticleIndexes.
Atoms get_psi_dihedral_atoms(Residue rd)
def init_stats_add_md_category
shortcut for a frequent series of operations on MD simulations' statistics.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
def init_model_HBonds_marginal
read TBL file and store lognormal restraints, using the marginal of the lognormal with one sigma and ...
Apply an NOE distance restraint between two particles.
Definition: NOERestraint.h:23
def init_stats_add_mc_category
shortcut for a frequent series of operations on MC simulations' statistics.
def init_simulation_setup_protein_hmc_hopper
setup hybrid monte-carlo on protein.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class to handle individual model particles.
phi/psi dihedral restraint between four particles, using data from TALOS.
Score the bond based on a UnaryFunction,.
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:318
Base class for all optimizers.
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:13
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
nonspecific methods used across all shared function objects.
A decorator for a residue.
Definition: Residue.h:133
Apply a lognormal distance restraint between two particles.
See IMP.core for more information.
def init_model_standard_SAXS_restraint
read experimental SAXS profile and apply restraint the standard way (like foxs) Returns: a restraints...
def init_model_setup_scale
sets up a Scale particle to the initial default value.
def init_model_jeffreys
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of Jeffr...
A filter that excludes bonds, angles and dihedrals.
def init_model_vonMises_restraint_mean
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
def init_stats_add_hmc_category
shortcut for a frequent series of operations on HMC simulations' statistics.
def m
wrapper to call methods of m
Apply an NOE distance restraint between two particles.
See IMP.atom for more information.
void read_pdb(base::TextInput input, int model, Hierarchy h)
CHARMMParameters * get_all_atom_CHARMM_parameters()
Miscellaneous utilities.
Definition: utils.py:1
Hierarchies get_leaves(const Selection &h)
Classes to handle TALOS files or folders.
Definition: TALOSReader.py:1
Applies a PairScore to each Pair in a list.
See IMP.saxs for more information.
Definition: ChiFreeScore.h:14
def init_model_conjugate_kappa
given a list of scales, returns a RestraintSet('prior') with weight 1.0 that contains a list of vonMi...
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:72
See IMP.isd for more information.
Classes to handle TBL files.
Definition: TBLReader.py:1
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24
Maintains temperature during molecular dynamics.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27