IMP  2.1.1
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 import sys,os
3 from glob import glob
4 
5 import IMP
6 import IMP.atom
7 import IMP.container
8 import IMP.saxs
9 import IMP.core
10 import IMP.isd
11 import IMP.isd.TBLReader
13 import IMP.isd.utils
14 from IMP.isd.Entry import Entry
15 
16 from math import sqrt,exp
17 from StringIO import StringIO
18 
19 from random import random
20 
21 kB= (1.381 * 6.02214) / 4184.0
22 
23 class PyMDMover:
24  def __init__(self, cont, md, n_md_steps):
25  self.cont = cont
26  self.md = md
27  self.n_md_steps = n_md_steps
28  self.velkeys=[IMP.FloatKey("vx"),
29  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
30 
31  def store_move(self):
32  self.oldcoords=[]
33  #self.oldvel=[]
34  for i in xrange(self.cont.get_number_of_particles()):
35  p=self.cont.get_particle(i)
36  #self.oldvel.append([p.get_value(k) for k in self.velkeys])
37  d=IMP.core.XYZ(p)
38  self.oldcoords.append(d.get_coordinates())
39 
40  def propose_move(self, prob):
41  self.md.optimize(self.n_md_steps)
42 
43  def reset_move(self):
44  for i in xrange(self.cont.get_number_of_particles()):
45  p=self.cont.get_particle(i)
46  #for j,k in enumerate(self.velkeys):
47  # p.set_value(k, self.oldvel[i][j])
48  d=IMP.core.XYZ(p)
49  d.set_coordinates(self.oldcoords[i])
50 
51  def get_number_of_steps(self):
52  return self.n_md_steps
53 
54  def set_number_of_steps(self, nsteps):
55  self.n_md_steps = nsteps
56 
57 class PyMC(IMP.Optimizer):
58  debug =True
59 
60  def __init__(self,model):
61  self.m=model
62 
63  def add_mover(self,mv):
64  self.mv = mv
65 
66  def set_kt(self,kT):
67  self.kT=kT
68 
69  def set_return_best(self,thing):
70  pass
71  def set_move_probability(self,thing):
72  pass
73 
74  def get_energy(self):
75  pot=self.m.evaluate(False)
76  #pot=self.get_restraints().evaluate(False)
77  kin=self.mv.md.get_kinetic_energy()
78  return pot+kin
79 
80  def metropolis(self, old, new):
81  deltaE=new-old
82  if self.debug:
83  print ": old %f new %f deltaE %f new_epot: %f" % (old,new,deltaE,
84  self.m.evaluate(False)),
85  kT=self.kT
86  if deltaE < 0:
87  return True
88  else:
89  return exp(-deltaE/kT) > random()
90 
91  def optimize(self,nsteps):
92  self.naccept = 0
93  print "=== new MC call"
94  #store initial coordinates
95  self.mv.store_move()
96  for i in xrange(nsteps):
97  print "MC step %d " % i,
98  #draw new velocities
99  self.mv.md.assign_velocities(self.kT / kB)
100  #get total energy
101  old=self.get_energy()
102  #make a MD move
103  self.mv.propose_move(1)
104  #get new total energy
105  new=self.get_energy()
106  if self.metropolis(old,new):
107  #move was accepted: store new conformation
108  self.mv.store_move()
109  self.naccept += 1
110  print "accepted "
111  else:
112  #move rejected: restore old conformation
113  self.mv.reset_move()
114  print " "
115 
116  def get_number_of_forward_steps(self):
117  return self.naccept
118 
120  """nonspecific methods used across all shared function objects.
121  Rules:
122  - Their name starts with the name of the parent function (e.g.
123  init_model_* )
124  - they don't store anything in the class, but instead
125  return all created objects.
126  Exceptions: the model, which is self._m
127  the statistics class, which is self.stat
128  - they store what they have to store in the model (e.g. restraints)
129  - they don't print out anything except for long routines (e.g. NOE
130  parsing)
131  - the prior RestraintSet is added to the model when it is created, so
132  that when it is passed to another function, it is not added twice.
133  """
134 
135  def hello(self):
136  return "hello world"
137 
138  def set_checklevel(self,value):
140 
141  def set_loglevel(self,value):
143 
144  def m(self,name,*args,**kw):
145  "wrapper to call methods of m"
146  func=getattr(self._m,name)
147  return func(*args, **kw)
148 
149  def init_model_base(self, wd):
150  "moves to wd and creates model"
151  os.chdir(wd)
152  # Create an IMP model
153  self._m = IMP.Model()
154 
155  def init_model_charmm_protein_and_ff(self, initpdb, top, par, selector, pairscore,
156  ff_temp=300.0, disulfides=None, representation='custom'):
157  """creates a CHARMM protein representation.
158  creates the charmm force field, bonded and nonbonded.
159  - initpdb: initial structure in pdb format
160  - top is a CHARMM top.lib, read if representation=='custom' (default)
161  - par is a CHARMM par.lib
162  - selector is an instance of
163  one of the selectors of IMP.atom, for example
164  IMP.atom.NonWaterNonHydrogenPDBSelector().
165  - pairscore is an instance of a Pair Score to score the interaction
166  between two atoms. usually, it's either
167  LennardJonesDistancePairScore(0,1) or
168  RepulsiveDistancePairScore(0,1)
169  - ff_temp is the temperature at which the force field should be
170  simulated.
171  - disulfides: if not None, a list of tuples corresponding to residue
172  numbers that should be cross-linked. Residues should be
173  cysteines, and residue numbering should start at 0.
174  - representation: 'full' : all-atom CHARMM force field
175  'heavy': heavy atom CHARMM forcefield with polar H
176  'calpha': only C alphas, ball and stick model with
177  bondlength 3.78 angstrom, beads at VdW
178  contact, and harmonic restraint between
179  them.
180  'custom' (default) : read given CHARMM top and par
181  files.
182  Returns: prot, ff, rsb, rs
183  - prot: the protein
184  - ff: the force field
185  - rsb: the RestraintSet on bonded interactions
186  - rs: the RestraintSet on nonbonded interactions. Both are weighted
187  using ff_temp.
188  """
189  m=self._m
190  prot = IMP.atom.read_pdb(initpdb, m, selector)
191  if not prot.get_is_valid(True):
192  raise ValueError, "invalid hierarchy!"
193  if representation == 'custom':
194  # Read in the CHARMM heavy atom topology and parameter files
195  ff = IMP.atom.CHARMMParameters(top,par)
196  elif representation == 'heavy':
198  elif representation == 'full':
200  elif representation == 'calpha':
201  pass
202  else:
203  raise NotImplementedError, representation
204  if representation == 'calpha':
205  print "setting up simplified C alpha force field"
206  # set radii
207  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
208  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
209  #IMP.atom.Mass(ca.get_particle()).set_mass(1.)
210  #create bonds by getting pairs and applying a pairscore
211  pairs=[]
212  for chain in prot.get_children():
213  residues=[(chain.get_child(i),chain.get_child(i+1))
214  for i in xrange(chain.get_number_of_children()-1)]
215  residues=[(i.get_child(0).get_particle(),
216  j.get_child(0).get_particle())
217  for (i,j) in residues]
218  pairs.extend(residues)
219  bonds=[]
220  for (i,j) in pairs:
221  #check, because it fails if you try twice
223  bi = IMP.atom.Bonded(i)
224  else:
227  bi = IMP.atom.Bonded(i)
228  else:
230  bond=IMP.atom.create_custom_bond(bi,bj,3.78,10) #stiff
231  bonds.append(bond)
232  bonds_container = IMP.container.ListSingletonContainer(bonds)
233  hdps = IMP.core.Harmonic(0,1)
234  bs = IMP.atom.BondSingletonScore(hdps)
235  br = IMP.container.SingletonsRestraint(bs, bonds_container)
236  rsb = IMP.RestraintSet("bonded")
237  rsb.add_restraint(br)
238  rsb.set_weight(1.0/(kB*ff_temp))
239  m.add_restraint(rsb)
240  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
241  nonbonded_pair_filter.set_bonds(bonds)
242  ff=None
243  else:
244  print "setting up CHARMM forcefield"
245  #
246  # Using the CHARMM libraries, determine the ideal topology (atoms and their
247  # connectivity) for the PDB file's primary sequence
248  topology = ff.create_topology(prot)
249  # Typically this modifies the C and N termini of each chain in the protein by
250  # applying the CHARMM CTER and NTER patches. Patches can also be manually
251  # applied at this point, e.g. to add disulfide bridges.
252  topology.apply_default_patches()
253  #disulfides
254  if disulfides:
255  s=topology.get_segment(0)
256  dis=ff.get_patch('DISU')
257  for (i,j) in disulfides:
258  self.find_atom((i, 'SG'), prot)
259  self.find_atom((j, 'SG'), prot)
260  r0=s.get_residue(i)
261  r1=s.get_residue(j)
262  if i==0:
263  r0.set_patched(False)
264  if j==0:
265  r1.set_patched(False)
266  dis.apply(r0,r1)
267  print "added disulfide bridge between cysteines %d and %d" % (i,j)
268  # Make the PDB file conform with the topology; i.e. if it contains extra
269  # atoms that are not in the CHARMM topology file, remove them; if it is
270  # missing atoms (e.g. sidechains, hydrogens) that are in the CHARMM topology,
271  # add them and construct their Cartesian coordinates from internal coordinate
272  # information.
273  topology.setup_hierarchy(prot)
274  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
275  # impropers) of the CHARMM forcefield
276  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
277  rsb = IMP.RestraintSet(m,1.0,"bonded")
278  rsb.add_restraint(r)
279  m.add_restraint(rsb)
280  #
281  # Add non-bonded interaction (in this case, Lennard-Jones). This needs to
282  # know the radii and well depths for each atom, so add them from the forcefield
283  # (they can also be assigned manually using the XYZR or LennardJones
284  # decorators):
285  ff.add_radii(prot)
286  ff.add_well_depths(prot)
287  nonbonded_pair_filter = r.get_pair_filter()
288  # Get a list of all atoms in the protein, and put it in a container
289  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
291  # Add a restraint for the Lennard-Jones interaction. This is built from
292  # a collection of building blocks. First, a ClosePairContainer maintains a list
293  # of all pairs of Particles that are close. Next, all 1-2, 1-3 and 1-4 pairs
294  # from the stereochemistry created above are filtered out.
295  # Then, a LennardJonesPairScore scores a pair of atoms with the Lennard-Jones
296  # potential. Finally, a PairsRestraint is used which simply applies the
297  # LennardJonesPairScore to each pair in the ClosePairContainer.
298  nbl = IMP.container.ClosePairContainer(cont, 4.0)
299  nbl.add_pair_filter(nonbonded_pair_filter)
300  #should weight the ff restraint by a temperature, set to 300K.
301  pr = IMP.container.PairsRestraint(pairscore, nbl)
302  rs = IMP.RestraintSet(m,1.0/(kB*ff_temp),'phys')
303  rs.add_restraint(pr)
304  m.add_restraint(rs)
305  return prot, ff, rsb, rs
306 
307  def init_model_setup_scale(self, default, lower=None, upper=None):
308  """sets up a Scale particle to the initial default value. It can
309  optionnally be constrained between two positive bounds, or else its
310  range is 0 to infinity.
311  """
312  m=self._m
313  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m),default)
314  if lower:
315  scale.set_lower(lower)
316  if upper:
317  scale.set_upper(upper)
318  return scale
319 
320  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
321  """given a list of scales, returns a RestraintSet('prior') with weight
322  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.
323  If argument prior_rs is used, add them to that RestraintSet instead.
324  """
325  if not prior_rs:
326  prior_rs = IMP.RestraintSet('prior')
327  self._m.add_restraint(prior_rs)
328  prior_rs.set_weight(1.0)
329  for i in scales:
330  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
331  return prior_rs
332 
333  def init_model_jeffreys(self, scales, prior_rs=None):
334  """given a list of scales, returns a RestraintSet('prior') with weight
335  1.0 that contains a list of JeffreysRestraint on each scale.
336  If argument prior_rs is used, add them to that RestraintSet instead.
337  """
338  if not prior_rs:
339  prior_rs = IMP.RestraintSet(self._m,1.0,'prior')
340  self._m.add_restraint(prior_rs)
341  for i in scales:
342  jr=IMP.isd.JeffreysRestraint(self._m,i)
343  prior_rs.add_restraint(jr)
344  return prior_rs
345 
346  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
347  """given a list of scales, returns a RestraintSet('prior') with weight
348  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
349  scale. If argument prior_rs is used, add them to that RestraintSet
350  instead.
351  """
352  if not (0 <= R <= c):
353  raise ValueError, "parameters R and c should satisfy 0 <= R <= c"
354  if not prior_rs:
355  prior_rs = IMP.RestraintSet(self._m,1.0,'prior')
356  self._m.add_restraint(prior_rs)
357  for i in scales:
358  prior_rs.add_restraint(IMP.isd.vonMisesKappaConjugateRestraint(i, c, R))
359  return prior_rs
360 
361  def find_atom(self, atom, prot):
362  """scans the prot hierarchy and tries to find atom = (resno, name)
363  assumes that resno follows the same numbering as the sequence.
364  Stores already found atoms for increased speed.
365  """
366  if not hasattr(self,'__memoized'):
367  self.__memoized={prot:{}}
368  try:
369  return self.__memoized[prot][atom]
370  except:
371  pass
372  try:
373  sel=IMP.atom.Selection(hierarchy=prot,
374  residue_index=atom[0],
375  atom_type=IMP.atom.AtomType(atom[1])
376  ).get_selected_particles()
377  if len(sel) > 1:
378  print "found multiple atoms for atom %d %s!" % atom
379  return
380  p0=IMP.core.XYZ(sel[0])
381  except:
382  print "atom %d %s not found" % atom
383  return
384  self.__memoized[prot][atom] = p0
385  return p0
386 
387  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
388  """
389  Sets up a vonMises torsion angle restraint using the given kappa
390  particle as concentration parameter. Returns the restraint.
391  data is a list of observations.
392  """
393  return IMP.isd.TALOSRestraint(atoms,data,kappa)
394 
395  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
396  """
397  Sets up a vonMises torsion angle restraint using the given kappa
398  particle as concentration parameter. Returns the restraint.
399  data is (mean, standard deviation).
400  """
401  raise NotImplementedError
402 
403  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
404  """
405  Sets up a lognormal distance restraint using the given sigma and gamma.
406  Returns the restraint.
407  assumes atoms = (atom1, atom2)
408  where atom1 is (resno, atomname) and resno is the residue sequence
409  number.
410  """
411  #find corresponding atoms
412  p0=self.find_atom(atoms[0], prot)
413  p1=self.find_atom(atoms[1], prot)
414  #create lognormal restraint using gamma_data = 1
415  ln = IMP.isd.NOERestraint(self._m,p0,p1,sigma,gamma,distance**(-6))
416  return ln
417 
418  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
419  sigma, gamma):
420  """Reads an ambiguous NOE restraint. contributions is a list of
421  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
422  lognormal distance restraint using the given sigma and gamma.
423  Returns the restraint.
424  """
425  #create pairs list
426  pairs=[(self.find_atom(i, prot).get_particle(),
427  self.find_atom(j, prot).get_particle()) for (i,j) in
428  contributions]
429  pairs = IMP.container.ListPairContainer(pairs)
430  #create restraint
431  ln = IMP.isd.AmbiguousNOERestraint(pairs, sigma, gamma, distance**(-6))
432  return ln
433 
434  def init_model_NOEs(self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
435  bounds_sigma=(1.0,0.1,100), bounds_gamma=(1.0,0.1,100),
436  verbose=True, sequence_match=(1,1)):
437  """read TBL file and store NOE restraints, using one sigma and one gamma
438  for the whole dataset. Creates the necessary uninformative priors.
439  - prot: protein hierarchy
440  - seqfile: a file with 3-letter sequence
441  - tblfile: a TBL file with the restraints
442  - name: an optional name for the restraintset
443  - prior_rs: when not None, add new sigma and gamma to this
444  RestraintSet instance.
445  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
446  bounds can be -1 to set to default range [0,+inf]
447  - verbose: be verbose (default True)
448  - sequence_match : (noe_start, sequence_start)
449  Returns: data_rs, prior_rs, sigma, gamma
450  """
451  #prior
452  if verbose:
453  print "Prior for the NOE Scales"
454  sigma=self.init_model_setup_scale(*bounds_sigma)
455  gamma=self.init_model_setup_scale(*bounds_gamma)
456  prior_rs = self.init_model_jeffreys([sigma,gamma], prior_rs)
457  #likelihood
458  rs = IMP.RestraintSet(self._m,1.0,name)
459  #use the TBLReader to parse the TBL file.
460  sequence = IMP.isd.utils.read_sequence_file(seqfile,
461  first_residue_number=sequence_match[1])
462  tblr = IMP.isd.TBLReader.TBLReader(sequence,
463  sequence_match=sequence_match)
464  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
465  for i,restraint in enumerate(restraints):
466  if verbose and i % 100 == 0:
467  print "\r%d" % i,
468  sys.stdout.flush()
469  #a restraint is (contributions, dist, upper, lower, volume)
470  #where contributions is a tuple of contributing pairs
471  #and a pair is (c1, c2), where c1 is of the form (resno, atname)
472  if len(restraint[0]) > 1:
473  ln = self.init_model_ambiguous_NOE_restraint(prot, restraint[0],
474  restraint[1], sigma, gamma)
475  else:
476  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
477  restraint[1], sigma, gamma)
478  rs.add_restraint(ln)
479  if verbose:
480  print "\r%d NOE restraints read" % i
481  #set weight of rs and add to model.
482  #Weight is 1.0 cause sigma particle already has this role.
483  self._m.add_restraint(rs)
484  return rs, prior_rs, sigma, gamma
485 
486  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
487  verbose=True, sequence_match=(1,1)):
488  """read TBL file and store NOE restraints, using the marginal of the
489  lognormal with one sigma and one gamma, for the whole dataset.
490  - prot: protein hierarchy
491  - seqfile: a file with 3-letter sequence
492  - tblfile: a TBL file with the restraints
493  - name: an optional name for the restraintset
494  - verbose: be verbose (default True)
495  - sequence_match : (noe_start, sequence_start)
496  Returns: data_rs
497  """
498  #likelihood
499  rs = IMP.RestraintSet(self._m,1.0,name)
500  #use the TBLReader to parse the TBL file.
501  sequence = IMP.isd.utils.read_sequence_file(seqfile,
502  first_residue_number=sequence_match[1])
503  tblr = IMP.isd.TBLReader.TBLReader(sequence,
504  sequence_match=sequence_match)
505  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
507  for i,restraint in enumerate(restraints):
508  if verbose and i % 100 == 0:
509  print "\r%d" % i,
510  sys.stdout.flush()
511  #a restraint is (contributions, dist, upper, lower, volume)
512  #where contributions is a tuple of contributing pairs
513  #and a pair is (c1, c2), where c1 is of the form (resno, atname)
514  #residue numbers start at 0
515  pairs=[(self.find_atom(i, prot).get_particle(),
516  self.find_atom(j, prot).get_particle()) for (i,j) in
517  restraint[0]]
518  pairs = IMP.container.ListPairContainer(pairs)
519  ln.add_contribution(pairs, restraint[4])
520  rs.add_restraint(ln)
521  if verbose:
522  print "\r%d NOE contributions added" % (len(restraints))
523  self._m.add_restraint(rs)
524  return rs
525 
526  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
527  verbose=True):
528  """read TBL file and store lognormal restraints, using the marginal of the
529  lognormal with one sigma and gamma=1, for the whole dataset.
530  - prot: protein hierarchy
531  - seqfile: a file with 3-letter sequence
532  - tblfile: a TBL file with the restraints
533  - name: an optional name for the restraintset
534  - verbose: be verbose (default True)
535  Returns: data_rs
536  """
537  #likelihood
538  rs = IMP.RestraintSet(self._m,1.0,name)
539  #use the TBLReader to parse the TBL file.
540  sequence = IMP.isd.utils.read_sequence_file(seqfile)
541  tblr = IMP.isd.TBLReader.TBLReader(sequence)
542  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
544  for i,restraint in enumerate(restraints):
545  if verbose and i % 100 == 0:
546  print "\r%d" % i,
547  sys.stdout.flush()
548  #a restraint is (contributions, dist, upper, lower, volume)
549  #where contributions is a tuple of contributing pairs
550  #and a pair is (c1, c2), where c1 is of the form (resno, atname)
551  #residue numbers start at 0
552  pairs=[(self.find_atom(i, prot).get_particle(),
553  self.find_atom(j, prot).get_particle()) for (i,j) in
554  restraint[0]]
555  pairs = IMP.container.ListPairContainer(pairs)
556  ln.add_contribution(pairs, restraint[4])
557  rs.add_restraint(ln)
558  if verbose:
559  print "\r%d Hbond contributions added" % (len(restraints))
560  self._m.add_restraint(rs)
561  return rs
562 
563  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
564  sequence_match=(1,1),name='TALOS', prior_rs=None,
565  bounds_kappa=(1.0, 0.1,10), verbose=True, prior='jeffreys',
566  keep_all=False):
567  """read TALOS dihedral angle data, and create restraints for phi/psi
568  torsion angles, along with the prior for kappa, which is a scale for the
569  whole dataset, compare to 1/sigma**2 in the NOE case.
570  - prot: protein hierarchy
571  - seqfile: a file with 3-letter sequence
572  - talos_data: either a file (pred.tab), or a folder (pred/) in which all
573  files in the form res???.tab can be found. If possible,
574  try to provide the folder, as statistics are more
575  accurate in that case.
576  - fulldata : either True or False, whether the data is the full TALOS
577  output (predAll.tab or pred/ folder), or just the averages
578  (pred.tab)
579  - sequence_match : (talos_no, sequence_no) to adjust for different
580  residue numberings
581  - name: an optional name for the restraintset
582  - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.
583  - bounds_kappa: tuple of (initial value, lower, upper bound)
584  bounds can be -1 to set to default range [0,+inf]
585  - verbose: be verbose (default True)
586  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
587  conjugate prior of the von Mises restraint, with parameters R
588  and c. Good values are R=0 and c=10. Default: jeffreys prior.
589  - keep_all: in case of a folder for 'talos_data', whether to keep
590  candidates marked as 'outliers' by TALOS, or to include them.
591  Returns: data_rs, prior_rs, kappa
592 
593  """
594  #prior
595  if verbose:
596  print "Prior for von Mises Kappa"
597  kappa=self.init_model_setup_scale(*bounds_kappa)
598  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
599  #likelihood
600  if verbose:
601  print "reading data"
602  rs = IMP.RestraintSet(self._m,1.0,name)
603  sequence= IMP.isd.utils.read_sequence_file(seqfile,
604  first_residue_no=sequence_match[1])
605  if fulldata:
606  talosr=IMP.isd.TALOSReader.TALOSReader(sequence, True, keep_all,
607  sequence_match=sequence_match)
608  if os.path.isdir(talos_data):
609  #using pred/res???.tab files
610  for i,res in enumerate(glob(os.path.join(talos_data,'res???.tab'))):
611  if verbose and i % 100:
612  print "\r%d" % i,
613  sys.stdout.flush()
614  talosr.read(res)
615  else:
616  #using predAll.tab file
617  talosr.read(talos_data)
618  else:
619  #using pred.tab file and correcting for estimates
620  talosr=IMP.isd.TALOSReader.TALOSReader(sequence, False, keep_all,
621  sequence_match=sequence_match)
622  talosr.read(talos_data)
623  #get harvested data and create restraints
624  data = talosr.get_data()
625  if verbose:
626  print "\rread dihedral data for %d residues" % len(data)
627  print "creating restraints"
628  avgR=[]
629  for resno,datum in data.iteritems():
630  phidata=datum['phi']
631  psidata=datum['psi']
632  num=datum['num']
633  res=IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
636  if fulldata:
637  r=self.init_model_vonMises_restraint_full(phi, phidata, kappa)
638  rs.add_restraint(r)
639  if verbose:
640  avgR.append(r.get_R0())
641  r=self.init_model_vonMises_restraint_full(psi, psidata, kappa)
642  rs.add_restraint(r)
643  if verbose:
644  avgR.append(r.get_R0())
645  else:
646  r=self.init_model_vonMises_restraint_mean(phi, phidata, kappa)
647  rs.add_restraint(r)
648  if verbose:
649  avgR.append(r.get_R0())
650  r=self.init_model_vonMises_restraint_mean(psi, psidata, kappa)
651  rs.add_restraint(r)
652  if verbose:
653  avgR.append(r.get_R0())
654  if verbose:
655  print "%s Restraints created. Average R0: %f" % \
656  (len(avgR), sum(avgR)/float(len(avgR)))
657  self._m.add_restraint(rs)
658  return rs, prior_rs, kappa
659 
660  def init_model_standard_SAXS_restraint(self, prot, profilefile, name='SAXS',
661  ff_type=IMP.saxs.HEAVY_ATOMS):
662  """read experimental SAXS profile and apply restraint the standard
663  way (like foxs)
664  Returns: a restraintset and the experimental profile
665  """
666  rs = IMP.RestraintSet(self._m,1.0,name)
667  saxs_profile = IMP.saxs.Profile(profilefile)
668  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
669  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
670  rs.add_restraint(saxs_restraint)
671  self._m.add_restraint(rs)
672  return rs, saxs_profile
673 
674  def _setup_md(self,prot, temperature=300.0, thermostat='berendsen',
675  coupling=500, md_restraints=None, timestep=1.0, recenter=1000,
676  momentum=1000):
677  """setup molecular dynamics
678  - temperature: target temperature
679  - thermostat: one of 'NVE', rescale_velocities',
680  'berendsen', 'langevin'
681  - coupling: coupling constant (tau (fs) for berendsen,
682  gamma (/fs) for langevin)
683  - md_restraints: if not None, specify the terms of the energy to be used
684  during the md steps via a list of restraints.
685  - timestep: in femtoseconds.
686  - recenter: recenter the molecule every so many steps (Langevin only)
687  - momentum: remove angular momentum every so many steps (Berendsen only)
688  Returns: an instance of md and an instance of an OptimizerState (the
689  thermostat), or None if NVE.
690  """
691  ## Molecular Dynamics (from MAX BONOMI)
693  md.set_model(self._m)
694  md.assign_velocities(temperature)
695  md.set_time_step(timestep)
696  if thermostat == 'NVE':
697  os = None
698  elif thermostat == 'rescale_velocities':
700  IMP.atom.get_leaves(prot), temperature, 0)
701  md.add_optimizer_state(os)
702  elif thermostat == 'berendsen':
704  IMP.atom.get_leaves(prot), temperature, coupling)
705  md.add_optimizer_state(os)
707  IMP.atom.get_leaves(prot), momentum)
708  md.add_optimizer_state(mom)
709  elif thermostat == 'langevin':
711  IMP.atom.get_leaves(prot), temperature, coupling)
712  md.add_optimizer_state(os)
713  #cen = IMP.atom.RemoveTranslationOptimizerState(
714  # IMP.atom.get_leaves(prot), recenter)
715  #md.add_optimizer_state(cen)
716  else:
717  raise NotImplementedError, thermostat
718 
719  if md_restraints:
720  md.set_restraints(md_restraints)
721 
722  return md, os
723 
724  def _setup_normal_mover(self, particle, floatkey, stepsize):
725  """setup NormalMover to move particle's floatkey attribute
726  by a gaussian with standard deviation 'stepsize'
727  Returns: mover instance.
728  """
729  nm=IMP.core.NormalMover(self._m,particle.get_particle_index(),
730  IMP.FloatKeys([floatkey]),stepsize)
731  return nm
732 
733  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
734  """setup MDMover using md and particles.
735  - md: md instance
736  - particles: particles to move, usually the leaves of the protein
737  hierarchy
738  - n_md_steps: number of md steps to perform on each move
739  Returns: mover instance.
740  """
742  cont.add_particles(particles)
743  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
744 
745  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
746  """setup monte carlo using a certain mover.
747  - mover: mover to use, NormalMover or MDMover usually.
748  - temperature: target temperature.
749  - mc_restraints: if not None, list of restraints for the metropolis
750  criterion.
751  Returns: mc instance.
752  """
753  mc = IMP.core.MonteCarlo(self._m)
754  #why is this returning an int?
755  mc.add_mover(mover)
756  #set same temp as MD, careful with units
757  mc.set_kt(kB*temperature)
758  #allow to go uphill
759  mc.set_return_best(False)
760  if mc_restraints:
761  #tell mc to only use restraints involving the particle
762  mc.set_restraints(mc_restraints)
763  return mc
764 
765  def init_simulation_setup_protein_hmc_hopper(self, prot, temperature=300.0,
766  gamma=0.01, n_md_steps=10,
767  md_restraints=None, mc_restraints=None, timestep=1.0,
768  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
769  """setup hybrid monte-carlo on protein. Uses basin hopping with steepest
770  descent minimization.
771  - prot: protein hierarchy.
772  - temperature: target temperature.
773  - gamma: coupling constant for langevin (/fs)
774  - n_md_steps: number of md steps per mc step
775  - md_restraints: if not None, specify the terms of the energy to be used
776  during the md steps.
777  - mc_restraints: if not None, use these energy terms for the metropolis
778  criterion.
779  - timestep: time step for md, in femtoseconds.
780  - sd_threshold: stop steepest descent after energy difference drops
781  below that threshold
782  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
783  - sd_maxsteps: maximum number of steps for steepest descent
784  Returns: hmc, mdmover, md and OptimizerState (thermostat)
785  """
786  raise NotImplementedError
787  md, os = self._setup_md(prot, temperature=temperature,
788  thermostat='langevin', coupling=gamma,
789  md_restraints=md_restraints, timestep=timestep)
790  particles=IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
791  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
792  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
793  sd=IMP.core.SteepestDescent(self._m)
794  sd.set_threshold(sd_threshold)
795  sd.set_step_size(sd_stepsize)
796  hmc.set_local_optimizer(sd)
797  hmc.set_local_steps(sd_maxsteps)
798  hmc.set_use_basin_hopping(True)
799  return hmc, mdmover, md, os
800 
801  def init_simulation_setup_protein_hmc_nve(self, prot, temperature=300.0,
802  n_md_steps=100, md_restraints=None, mc_restraints=None,
803  timestep=1.0):
804  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
805  - prot: protein hierarchy.
806  - temperature: target temperature.
807  - coupling: coupling constant (tau (fs) for berendsen,
808  gamma (/fs) for langevin)
809  - n_md_steps: number of md steps per mc step
810  - md_restraints: if not None, specify the terms of the energy to be used
811  during the md steps.
812  - mc_restraints: if not None, use these energy terms for the metropolis
813  criterion.
814  - timestep: time step for md, in femtoseconds.
815  - sd_threshold: stop steepest descent after energy difference drops
816  below that threshold
817  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
818  - sd_maxsteps: maximum number of steps for steepest descent
819  Returns: hmc, mdmover and md
820  """
821  prot = self._p['prot']
822  md, os = self._setup_md(prot, temperature=temperature,
823  thermostat='NVE', coupling=None, timestep=1.0,
824  md_restraints=md_restraints)
825  particles=IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
827  cont.add_particles(particles)
828  mdmover = PyMDMover(cont, md, n_md_steps)
829  mdmover.m=self._m
830  #mdmover = IMP.atom.MDMover(cont, md, temperature, n_md_steps)
831  mc = PyMC(self._m)
832  #mc = IMP.core.MonteCarlo(self._m)
833  #why is this returning an int?
834  mc.add_mover(mdmover)
835  #set same temp as MD, careful with units
836  mc.set_kt(kB*temperature)
837  #allow to go uphill
838  mc.set_return_best(False)
839  #update all particles each time
840  mc.set_move_probability(1.0)
841  return mc, mdmover, md
842 
843  def init_simulation_setup_nuisance_mc(self, nuis, temperature=300.0,
844  mc_restraints=None, nm_stepsize=0.1):
845  """sets up monte carlo on nuisance, at a certain target temperature,
846  optionnally using a certain set of restraints only.
847  - nuis: nuisance particle
848  - temperature: target temperature
849  - mc_restraints: optional set of restraints from which the energy should
850  be drawn instead of the energy of the complete system.
851  - floatkey: the floatkey to move.
852  - nm_stepsize: the stepsize of the normal mover
853  Returns: mc instance, nm instance.
854  """
855  nm = self._setup_normal_mover(nuis, nuis.get_nuisance_key(),
856  nm_stepsize)
857  mc = self._setup_mc(nm, temperature, mc_restraints)
858  return mc, nm
859 
860  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
861  adjust_stepsize=True):
862  """run mc using a normal mover on a single particle,
863  update stepsize and store nsteps, acceptance and stepsize in the
864  statistics instance self.stat by using the key stats_key.
865  """
866  #do the monte carlo
867  mc.optimize(nsteps)
868  naccept = mc.get_number_of_forward_steps()
869  #increment the counter for this simulation
870  self.stat.increment_counter(stats_key, nsteps)
871  #get the acceptance rate, stepsize
872  accept = float(naccept)/nsteps
873  self.stat.update(stats_key, 'acceptance', accept)
874  stepsize = nm.get_sigma()
875  self.stat.update(stats_key, 'stepsize', stepsize)
876  #update stepsize if needed and requested
877  if adjust_stepsize:
878  if 0.4 < accept < 0.6:
879  return
880  if accept < 0.05:
881  accept = 0.05
882  if accept > 1.0:
883  accept = 1.0
884  nm.set_sigma(stepsize*2*accept)
885 
886  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
887  adjust_stepsize=True):
888  """run hmc, update stepsize and print statistics. Updates number of MD
889  steps to reach a target acceptance between 0.4 and 0.6, sends
890  statistics to self.stat. MD steps are always at least 10 and at most 500.
891  """
892  hmc.optimize(nsteps)
893  naccept = hmc.get_number_of_forward_steps()
894  self.stat.increment_counter(stats_key, nsteps)
895  accept = float(naccept)/nsteps
896  self.stat.update(stats_key, 'acceptance', accept)
897  mdsteps = mv.get_number_of_steps()
898  self.stat.update(stats_key, 'n_md_steps', mdsteps)
899  if adjust_stepsize:
900  if 0.4 < accept < 0.6:
901  return
902  mdsteps = int(mdsteps *2**(accept-0.5))
903  if mdsteps > 500:
904  mdsteps = 500
905  if mdsteps < 10:
906  mdsteps = 10
907  mv.set_nsteps(mdsteps)
908 
909  def get_pdb(self, prot):
910  """returns a string corresponding to the pdb structure of hierarchy
911  prot.
912  """
913  output = StringIO()
914  IMP.atom.write_pdb(prot, output)
915  return output.getvalue()
916 
917  def get_netcdf(self, prot):
918  raise NotImplementedError
919 
920  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
921  temperature=300.0, prot_coordinates=None):
922  """updates statistics for md simulation: target temp, kinetic energy,
923  kinetic temperature, writes coordinates and increments counter.
924  - md_key: stats md key
925  - nsteps: number of steps performed.
926  - md_instance: instance of the MolecularDynamics class.
927  - temperature: target temperature
928  - prot_coordinates: protein coordinates to be passed to the stats class,
929  (should be a string)
930  """
931  self.stat.update(md_key, 'target_temp', temperature)
932  kinetic = md_instance.get_kinetic_energy()
933  self.stat.update(md_key, 'E_kinetic', kinetic)
934  self.stat.update(md_key, 'instant_temp',
935  md_instance.get_kinetic_temperature(kinetic))
936  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
937  self.stat.increment_counter(md_key, nsteps)
938 
939  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
940  """shortcut for a frequent series of operations on MC simulations'
941  statistics. Creates an entry for acceptance, stepsize and one
942  coordinate set printed in the statistics file.
943  """
944  #create category
945  mc_key = stat.add_category(name=name)
946  #giving None as argument is a way to create a static entry.
947  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
948  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
949  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
950  #special call to add coordinates to be dumped
951  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
952  #add the counter to the output
953  stat.add_entry(mc_key, name='counter')
954  return mc_key
955 
956  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
957  """shortcut for a frequent series of operations on MD simulations'
958  statistics. Creates an entry for target temp, instantaneous temp,
959  kinetic energy, and one set of coordinates called 'protein' by
960  default.
961  """
962  #create category
963  md_key = stat.add_category(name=name)
964  #giving None as argument is a way to create a static entry.
965  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
966  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
967  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
968  #special call to add coordinates to be dumped
969  stat.add_coordinates(md_key, coord)
970  #add the counter to the output
971  stat.add_entry(md_key, name='counter')
972  return md_key
973 
974  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
975  """shortcut for a frequent series of operations on HMC simulations'
976  statistics. Adds acceptance, number of MD steps and a trajectory for
977  a protein.
978  """
979  #create category
980  hmc_key = stat.add_category(name=name)
981  #giving None as argument is a way to create a static entry.
982  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
983  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
984  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
985  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
986  #special call to add coordinates to be dumped
987  stat.add_coordinates(hmc_key, coord)
988  #add the counter to the output
989  stat.add_entry(hmc_key, name='counter')
990  return hmc_key
991 
992  def rescale_velocities(self, particles, factor):
993  """rescale the velocities of a bunch of particles having vx vy and vz
994  floatkeys
995  """
996  keys=[IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
997  for p in particles:
998  for k in keys:
999  p.set_value(k, p.get_value(k)*factor)
Applies a SingletonScore to each Singleton in a list.
def rescale_velocities
rescale the velocities of a bunch of particles having vx vy and vz floatkeys
CHARMMParameters * get_heavy_atom_CHARMM_parameters()
A Monte Carlo optimizer.
Definition: MonteCarlo.h:47
def init_model_ambiguous_NOE_restraint
Reads an ambiguous NOE restraint.
Maintains temperature during molecular dynamics.
Enforce CHARMM stereochemistry on the given Hierarchy.
Atoms get_phi_dihedral_atoms(Residue rd)
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
def do_md_protein_statistics
updates statistics for md simulation: target temp, kinetic energy, kinetic temperature, writes coordinates and increments counter.
def init_model_charmm_protein_and_ff
creates a CHARMM protein representation.
def find_atom
scans the prot hierarchy and tries to find atom = (resno, name) assumes that resno follows the same n...
def init_model_NOEs
read TBL file and store NOE restraints, using one sigma and one gamma for the whole dataset...
See IMP.container for more information.
def init_simulation_setup_nuisance_mc
sets up monte carlo on nuisance, at a certain target temperature, optionnally using a certain set of ...
A decorator for a particle which has bonds.
void set_log_level(LogLevel l)
Set the current global log level.
def init_simulation_setup_protein_hmc_nve
setup hybrid monte-carlo on protein.
Calculate score based on fit to SAXS profile.
void set_check_level(CheckLevel tf)
Control runtime checks in the code.
Object used to hold a set of restraints.
A simple steepest descent optimizer.
def init_model_TALOS
read TALOS dihedral angle data, and create restraints for phi/psi torsion angles, along with the prio...
static Scale setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: Scale.h:30
Ambiguous NOE distance restraint between a number of pairs of particles.
The type of an atom.
def get_pdb
returns a string corresponding to the pdb structure of hierarchy prot.
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
def init_model_base
moves to wd and creates model
def init_model_NOEs_marginal
read TBL file and store NOE restraints, using the marginal of the lognormal with one sigma and one ga...
Return all close unordered pairs of particles taken from the SingletonContainer.
Classes to handle ISD statistics files.
Definition: Entry.py:1
def init_model_NOE_restraint
Sets up a lognormal distance restraint using the given sigma and gamma.
CHARMM force field parameters.
Bond create_custom_bond(Bonded a, Bonded b, Float length, Float stiffness=-1)
Connect the two wrapped particles by a custom bond.
Removes rigid translation and rotation from the particles.
def init_model_vonMises_restraint_full
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
Store a ParticleIndexPairs.
Conjugate prior for the concentration parameter of a von Mises distribution.
Maintains temperature during molecular dynamics by velocity scaling.
static Bonded setup_particle(kernel::Model *m, ParticleIndex pi)
def init_model_jeffreys_kappa
given a list of scales, returns a RestraintSet(&#39;prior&#39;) with weight 1.0 that contains a list of vonMi...
Simple molecular dynamics optimizer.
Atoms get_psi_dihedral_atoms(Residue rd)
def init_stats_add_md_category
shortcut for a frequent series of operations on MD simulations&#39; statistics.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
def init_model_HBonds_marginal
read TBL file and store lognormal restraints, using the marginal of the lognormal with one sigma and ...
Apply an NOE distance restraint between two particles.
Definition: NOERestraint.h:24
def init_stats_add_mc_category
shortcut for a frequent series of operations on MC simulations&#39; statistics.
def init_simulation_setup_protein_hmc_hopper
setup hybrid monte-carlo on protein.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:32
Class to handle individual model particles.
phi/psi dihedral restraint between four particles, using data from TALOS.
Score the bond based on a UnaryFunction,.
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:305
Base class for all optimizers.
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:11
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
nonspecific methods used across all shared function objects.
A decorator for a residue.
Definition: Residue.h:133
Apply a lognormal distance restraint between two particles.
See IMP.core for more information.
def init_model_standard_SAXS_restraint
read experimental SAXS profile and apply restraint the standard way (like foxs) Returns: a restraints...
def init_model_setup_scale
sets up a Scale particle to the initial default value.
def init_model_jeffreys
given a list of scales, returns a RestraintSet(&#39;prior&#39;) with weight 1.0 that contains a list of Jeffr...
A filter that excludes bonds, angles and dihedrals.
def init_model_vonMises_restraint_mean
Sets up a vonMises torsion angle restraint using the given kappa particle as concentration parameter...
def init_stats_add_hmc_category
shortcut for a frequent series of operations on HMC simulations&#39; statistics.
def m
wrapper to call methods of m
Apply an NOE distance restraint between two particles.
static bool particle_is_instance(::IMP::kernel::Particle *p)
See IMP.atom for more information.
void read_pdb(base::TextInput input, int model, Hierarchy h)
CHARMMParameters * get_all_atom_CHARMM_parameters()
Miscellaneous utilities.
Definition: utils.py:1
Hierarchies get_leaves(const Selection &h)
Classes to handle TALOS files or folders.
Definition: TALOSReader.py:1
Applies a PairScore to each Pair in a list.
See IMP.saxs for more information.
Definition: ChiFreeScore.h:14
def init_model_conjugate_kappa
given a list of scales, returns a RestraintSet(&#39;prior&#39;) with weight 1.0 that contains a list of vonMi...
Class for storing model, its restraints, constraints, and particles.
See IMP.isd for more information.
Classes to handle TBL files.
Definition: TBLReader.py:1
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:25
Maintains temperature during molecular dynamics.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27