IMP  2.0.0
The Integrative Modeling Platform
shared_functions.py
1 #!/usr/bin/env python
2 import sys,os
3 from glob import glob
4 
5 import IMP
6 import IMP.atom
7 import IMP.container
8 import IMP.saxs
9 import IMP.core
10 import IMP.isd
11 import IMP.isd.TBLReader
13 import IMP.isd.utils
14 from IMP.isd.Entry import Entry
15 
16 from math import sqrt,exp
17 from StringIO import StringIO
18 
19 from random import random
20 
21 kB= (1.381 * 6.02214) / 4184.0
22 
23 class PyMDMover:
24  def __init__(self, cont, md, n_md_steps):
25  self.cont = cont
26  self.md = md
27  self.n_md_steps = n_md_steps
28  self.velkeys=[IMP.FloatKey("vx"),
29  IMP.FloatKey("vy"), IMP.FloatKey("vz")]
30 
31  def store_move(self):
32  self.oldcoords=[]
33  #self.oldvel=[]
34  for i in xrange(self.cont.get_number_of_particles()):
35  p=self.cont.get_particle(i)
36  #self.oldvel.append([p.get_value(k) for k in self.velkeys])
37  d=IMP.core.XYZ(p)
38  self.oldcoords.append(d.get_coordinates())
39 
40  def propose_move(self, prob):
41  self.md.optimize(self.n_md_steps)
42 
43  def reset_move(self):
44  for i in xrange(self.cont.get_number_of_particles()):
45  p=self.cont.get_particle(i)
46  #for j,k in enumerate(self.velkeys):
47  # p.set_value(k, self.oldvel[i][j])
48  d=IMP.core.XYZ(p)
49  d.set_coordinates(self.oldcoords[i])
50 
51  def get_number_of_steps(self):
52  return self.n_md_steps
53 
54  def set_number_of_steps(self, nsteps):
55  self.n_md_steps = nsteps
56 
57 class PyMC(IMP.Optimizer):
58  debug =True
59 
60  def __init__(self,model):
61  self.m=model
62 
63  def add_mover(self,mv):
64  self.mv = mv
65 
66  def set_kt(self,kT):
67  self.kT=kT
68 
69  def set_return_best(self,thing):
70  pass
71  def set_move_probability(self,thing):
72  pass
73 
74  def get_energy(self):
75  pot=self.m.evaluate(False)
76  #pot=self.get_restraints().evaluate(False)
77  kin=self.mv.md.get_kinetic_energy()
78  return pot+kin
79 
80  def metropolis(self, old, new):
81  deltaE=new-old
82  if self.debug:
83  print ": old %f new %f deltaE %f new_epot: %f" % (old,new,deltaE,
84  self.m.evaluate(False)),
85  kT=self.kT
86  if deltaE < 0:
87  return True
88  else:
89  return exp(-deltaE/kT) > random()
90 
91  def optimize(self,nsteps):
92  self.naccept = 0
93  print "=== new MC call"
94  #store initial coordinates
95  self.mv.store_move()
96  for i in xrange(nsteps):
97  print "MC step %d " % i,
98  #draw new velocities
99  self.mv.md.assign_velocities(self.kT / kB)
100  #get total energy
101  old=self.get_energy()
102  #make a MD move
103  self.mv.propose_move(1)
104  #get new total energy
105  new=self.get_energy()
106  if self.metropolis(old,new):
107  #move was accepted: store new conformation
108  self.mv.store_move()
109  self.naccept += 1
110  print "accepted "
111  else:
112  #move rejected: restore old conformation
113  self.mv.reset_move()
114  print " "
115 
116  def get_number_of_forward_steps(self):
117  return self.naccept
118 
119 class sfo_common:
120  """nonspecific methods used across all shared function objects.
121  Rules:
122  - Their name starts with the name of the parent function (e.g.
123  init_model_* )
124  - they don't store anything in the class, but instead
125  return all created objects.
126  Exceptions: the model, which is self._m
127  the statistics class, which is self.stat
128  - they store what they have to store in the model (e.g. restraints)
129  - they don't print out anything except for long routines (e.g. NOE
130  parsing)
131  - the prior RestraintSet is added to the model when it is created, so
132  that when it is passed to another function, it is not added twice.
133  """
134 
135  def hello(self):
136  return "hello world"
137 
138  def set_checklevel(self,value):
140 
141  def set_loglevel(self,value):
143 
144  def m(self,name,*args,**kw):
145  "wrapper to call methods of m"
146  func=getattr(self._m,name)
147  return func(*args, **kw)
148 
149  def init_model_base(self, wd):
150  "moves to wd and creates model"
151  os.chdir(wd)
152  # Create an IMP model
153  self._m = IMP.Model()
154 
155  def init_model_charmm_protein_and_ff(self, initpdb, top, par, selector, pairscore,
156  ff_temp=300.0, disulfides=None, representation='custom'):
157  """creates a CHARMM protein representation.
158  creates the charmm force field, bonded and nonbonded.
159  - initpdb: initial structure in pdb format
160  - top is a CHARMM top.lib, read if representation=='custom' (default)
161  - par is a CHARMM par.lib
162  - selector is an instance of
163  one of the selectors of IMP.atom, for example
164  IMP.atom.NonWaterNonHydrogenPDBSelector().
165  - pairscore is an instance of a Pair Score to score the interaction
166  between two atoms. usually, it's either
167  LennardJonesDistancePairScore(0,1) or
168  RepulsiveDistancePairScore(0,1)
169  - ff_temp is the temperature at which the force field should be
170  simulated.
171  - disulfides: if not None, a list of tuples corresponding to residue
172  numbers that should be cross-linked. Residues should be
173  cysteines, and residue numbering should start at 0.
174  - representation: 'full' : all-atom CHARMM force field
175  'heavy': heavy atom CHARMM forcefield with polar H
176  'calpha': only C alphas, ball and stick model with
177  bondlength 3.78 angstrom, beads at VdW
178  contact, and harmonic restraint between
179  them.
180  'custom' (default) : read given CHARMM top and par
181  files.
182  Returns: prot, ff, rsb, rs
183  - prot: the protein
184  - ff: the force field
185  - rsb: the RestraintSet on bonded interactions
186  - rs: the RestraintSet on nonbonded interactions. Both are weighted
187  using ff_temp.
188  """
189  m=self._m
190  prot = IMP.atom.read_pdb(initpdb, m, selector)
191  if not prot.get_is_valid(True):
192  raise ValueError, "invalid hierarchy!"
193  if representation == 'custom':
194  # Read in the CHARMM heavy atom topology and parameter files
195  ff = IMP.atom.CHARMMParameters(top,par)
196  elif representation == 'heavy':
198  elif representation == 'full':
200  elif representation == 'calpha':
201  pass
202  else:
203  raise NotImplementedError, representation
204  if representation == 'calpha':
205  print "setting up simplified C alpha force field"
206  # set radii
207  for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):
208  IMP.core.XYZR(ca.get_particle()).set_radius(1.89)
209  #IMP.atom.Mass(ca.get_particle()).set_mass(1.)
210  #create bonds by getting pairs and applying a pairscore
211  pairs=[]
212  for chain in prot.get_children():
213  residues=[(chain.get_child(i),chain.get_child(i+1))
214  for i in xrange(chain.get_number_of_children()-1)]
215  residues=[(i.get_child(0).get_particle(),
216  j.get_child(0).get_particle())
217  for (i,j) in residues]
218  pairs.extend(residues)
219  bonds=[]
220  for (i,j) in pairs:
221  #check, because it fails if you try twice
223  bi = IMP.atom.Bonded(i)
224  else:
227  bi = IMP.atom.Bonded(i)
228  else:
230  bond=IMP.atom.create_custom_bond(bi,bj,3.78,10) #stiff
231  bonds.append(bond)
232  bonds_container = IMP.container.ListSingletonContainer(bonds)
233  hdps = IMP.core.Harmonic(0,1)
234  bs = IMP.atom.BondSingletonScore(hdps)
235  br = IMP.container.SingletonsRestraint(bs, bonds_container)
236  rsb = IMP.RestraintSet("bonded")
237  rsb.add_restraint(br)
238  rsb.set_weight(1.0/(kB*ff_temp))
239  m.add_restraint(rsb)
240  nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()
241  nonbonded_pair_filter.set_bonds(bonds)
242  ff=None
243  else:
244  print "setting up CHARMM forcefield"
245  #
246  # Using the CHARMM libraries, determine the ideal topology (atoms and their
247  # connectivity) for the PDB file's primary sequence
248  topology = ff.create_topology(prot)
249  # Typically this modifies the C and N termini of each chain in the protein by
250  # applying the CHARMM CTER and NTER patches. Patches can also be manually
251  # applied at this point, e.g. to add disulfide bridges.
252  topology.apply_default_patches()
253  #disulfides
254  if disulfides:
255  s=topology.get_segment(0)
256  dis=ff.get_patch('DISU')
257  for (i,j) in disulfides:
258  self.find_atom((i, 'SG'), prot)
259  self.find_atom((j, 'SG'), prot)
260  r0=s.get_residue(i)
261  r1=s.get_residue(j)
262  if i==0:
263  r0.set_patched(False)
264  if j==0:
265  r1.set_patched(False)
266  dis.apply(r0,r1)
267  print "added disulfide bridge between cysteines %d and %d" % (i,j)
268  # Make the PDB file conform with the topology; i.e. if it contains extra
269  # atoms that are not in the CHARMM topology file, remove them; if it is
270  # missing atoms (e.g. sidechains, hydrogens) that are in the CHARMM topology,
271  # add them and construct their Cartesian coordinates from internal coordinate
272  # information.
273  topology.setup_hierarchy(prot)
274  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
275  # impropers) of the CHARMM forcefield
276  r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)
277  rsb = IMP.RestraintSet("bonded")
278  rsb.add_restraint(r)
279  m.add_restraint(rsb)
280  #
281  # Add non-bonded interaction (in this case, Lennard-Jones). This needs to
282  # know the radii and well depths for each atom, so add them from the forcefield
283  # (they can also be assigned manually using the XYZR or LennardJones
284  # decorators):
285  ff.add_radii(prot)
286  ff.add_well_depths(prot)
287  nonbonded_pair_filter = r.get_pair_filter()
288  # Get a list of all atoms in the protein, and put it in a container
289  atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
291  # Add a restraint for the Lennard-Jones interaction. This is built from
292  # a collection of building blocks. First, a ClosePairContainer maintains a list
293  # of all pairs of Particles that are close. Next, all 1-2, 1-3 and 1-4 pairs
294  # from the stereochemistry created above are filtered out.
295  # Then, a LennardJonesPairScore scores a pair of atoms with the Lennard-Jones
296  # potential. Finally, a PairsRestraint is used which simply applies the
297  # LennardJonesPairScore to each pair in the ClosePairContainer.
298  nbl = IMP.container.ClosePairContainer(cont, 4.0)
299  nbl.add_pair_filter(nonbonded_pair_filter)
300  #should weight the ff restraint by a temperature, set to 300K.
301  pr = IMP.container.PairsRestraint(pairscore, nbl)
302  rs = IMP.RestraintSet('phys')
303  rs.add_restraint(pr)
304  rs.set_weight(1.0/(kB*ff_temp))
305  m.add_restraint(rs)
306  return prot, ff, rsb, rs
307 
308  def init_model_setup_scale(self, default, lower=-1, upper=-1):
309  """sets up a Scale particle to the initial default value. It can
310  optionnally be constrained between two positive bounds, or else its
311  range is 0 to infinity.
312  """
313  m=self._m
314  scale = IMP.isd.Scale.setup_particle(IMP.Particle(m),default,lower,upper)
315  #constrain it also for the optimizers
316  if lower != -1 or upper != -1:
317  m.add_score_state(IMP.core.SingletonConstraint(
318  IMP.isd.ScaleRangeModifier(),None,scale))
319  return scale
320 
321  def init_model_jeffreys_kappa(self, scales, prior_rs=None):
322  """given a list of scales, returns a RestraintSet('prior') with weight
323  1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.
324  If argument prior_rs is used, add them to that RestraintSet instead.
325  """
326  if not prior_rs:
327  prior_rs = IMP.RestraintSet('prior')
328  self._m.add_restraint(prior_rs)
329  prior_rs.set_weight(1.0)
330  for i in scales:
331  prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))
332  return prior_rs
333 
334  def init_model_jeffreys(self, scales, prior_rs=None):
335  """given a list of scales, returns a RestraintSet('prior') with weight
336  1.0 that contains a list of JeffreysRestraint on each scale.
337  If argument prior_rs is used, add them to that RestraintSet instead.
338  """
339  if not prior_rs:
340  prior_rs = IMP.RestraintSet('prior')
341  self._m.add_restraint(prior_rs)
342  prior_rs.set_weight(1.0)
343  for i in scales:
344  prior_rs.add_restraint(IMP.isd.JeffreysRestraint(i))
345  return prior_rs
346 
347  def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):
348  """given a list of scales, returns a RestraintSet('prior') with weight
349  1.0 that contains a list of vonMisesKappaConjugateRestraint on each
350  scale. If argument prior_rs is used, add them to that RestraintSet
351  instead.
352  """
353  if not (0 <= R <= c):
354  raise ValueError, "parameters R and c should satisfy 0 <= R <= c"
355  if not prior_rs:
356  prior_rs = IMP.RestraintSet('prior')
357  self._m.add_restraint(prior_rs)
358  prior_rs.set_weight(1.0)
359  for i in scales:
360  prior_rs.add_restraint(IMP.isd.vonMisesKappaConjugateRestraint(i, c, R))
361  return prior_rs
362 
363  def find_atom(self, atom, prot):
364  """scans the prot hierarchy and tries to find atom = (resno, name)
365  assumes that resno follows the same numbering as the sequence.
366  Stores already found atoms for increased speed.
367  """
368  if not hasattr(self,'__memoized'):
369  self.__memoized={prot:{}}
370  try:
371  return self.__memoized[prot][atom]
372  except:
373  pass
374  try:
375  sel=IMP.atom.Selection(hierarchy=prot,
376  residue_index=atom[0],
377  atom_type=IMP.atom.AtomType(atom[1])
378  ).get_selected_particles()
379  if len(sel) > 1:
380  print "found multiple atoms for atom %d %s!" % atom
381  return
382  p0=IMP.core.XYZ(sel[0])
383  except:
384  print "atom %d %s not found" % atom
385  return
386  self.__memoized[prot][atom] = p0
387  return p0
388 
389  def init_model_vonMises_restraint_full(self, atoms, data, kappa):
390  """
391  Sets up a vonMises torsion angle restraint using the given kappa
392  particle as concentration parameter. Returns the restraint.
393  data is a list of observations.
394  """
395  return IMP.isd.TALOSRestraint(atoms,data,kappa)
396 
397  def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):
398  """
399  Sets up a vonMises torsion angle restraint using the given kappa
400  particle as concentration parameter. Returns the restraint.
401  data is (mean, standard deviation).
402  """
403  raise NotImplementedError
404 
405  def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):
406  """
407  Sets up a lognormal distance restraint using the given sigma and gamma.
408  Returns the restraint.
409  assumes atoms = (atom1, atom2)
410  where atom1 is (resno, atomname) and resno is the residue sequence
411  number.
412  """
413  #find corresponding atoms
414  p0=self.find_atom(atoms[0], prot)
415  p1=self.find_atom(atoms[1], prot)
416  #create lognormal restraint using gamma_data = 1
417  ln = IMP.isd.NOERestraint(p0,p1,sigma,gamma,distance**(-6))
418  return ln
419 
420  def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,
421  sigma, gamma):
422  """Reads an ambiguous NOE restraint. contributions is a list of
423  (atom1, atom2) pairs, where atom1 is (resno, atomname). Sets up a
424  lognormal distance restraint using the given sigma and gamma.
425  Returns the restraint.
426  """
427  #create pairs list
428  pairs=[(self.find_atom(i, prot).get_particle(),
429  self.find_atom(j, prot).get_particle()) for (i,j) in
430  contributions]
431  pairs = IMP.container.ListPairContainer(pairs)
432  #create restraint
433  ln = IMP.isd.AmbiguousNOERestraint(pairs, sigma, gamma, distance**(-6))
434  return ln
435 
436  def init_model_NOEs(self, prot, seqfile, tblfile, name='NOE', prior_rs=None,
437  bounds_sigma=(1.0,0.1,100), bounds_gamma=(1.0,0.1,100),
438  verbose=True, sequence_match=(1,1)):
439  """read TBL file and store NOE restraints, using one sigma and one gamma
440  for the whole dataset. Creates the necessary uninformative priors.
441  - prot: protein hierarchy
442  - seqfile: a file with 3-letter sequence
443  - tblfile: a TBL file with the restraints
444  - name: an optional name for the restraintset
445  - prior_rs: when not None, add new sigma and gamma to this
446  RestraintSet instance.
447  - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)
448  bounds can be -1 to set to default range [0,+inf]
449  - verbose: be verbose (default True)
450  - sequence_match : (noe_start, sequence_start)
451  Returns: data_rs, prior_rs, sigma, gamma
452  """
453  #prior
454  if verbose:
455  print "Prior for the NOE Scales"
456  sigma=self.init_model_setup_scale(*bounds_sigma)
457  gamma=self.init_model_setup_scale(*bounds_gamma)
458  prior_rs = self.init_model_jeffreys([sigma,gamma], prior_rs)
459  #likelihood
460  rs = IMP.RestraintSet(name)
461  #use the TBLReader to parse the TBL file.
462  sequence = IMP.isd.utils.read_sequence_file(seqfile,
463  first_residue_number=sequence_match[1])
464  tblr = IMP.isd.TBLReader.TBLReader(sequence,
465  sequence_match=sequence_match)
466  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
467  for i,restraint in enumerate(restraints):
468  if verbose and i % 100 == 0:
469  print "\r%d" % i,
470  sys.stdout.flush()
471  #a restraint is (contributions, dist, upper, lower, volume)
472  #where contributions is a tuple of contributing pairs
473  #and a pair is (c1, c2), where c1 is of the form (resno, atname)
474  if len(restraint[0]) > 1:
475  ln = self.init_model_ambiguous_NOE_restraint(prot, restraint[0],
476  restraint[1], sigma, gamma)
477  else:
478  ln = self.init_model_NOE_restraint(prot, restraint[0][0],
479  restraint[1], sigma, gamma)
480  rs.add_restraint(ln)
481  if verbose:
482  print "\r%d NOE restraints read" % i
483  #set weight of rs and add to model.
484  #Weight is 1.0 cause sigma particle already has this role.
485  rs.set_weight(1.0)
486  self._m.add_restraint(rs)
487  return rs, prior_rs, sigma, gamma
488 
489  def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',
490  verbose=True, sequence_match=(1,1)):
491  """read TBL file and store NOE restraints, using the marginal of the
492  lognormal with one sigma and one gamma, for the whole dataset.
493  - prot: protein hierarchy
494  - seqfile: a file with 3-letter sequence
495  - tblfile: a TBL file with the restraints
496  - name: an optional name for the restraintset
497  - verbose: be verbose (default True)
498  - sequence_match : (noe_start, sequence_start)
499  Returns: data_rs
500  """
501  #likelihood
502  rs = IMP.RestraintSet(name)
503  #use the TBLReader to parse the TBL file.
504  sequence = IMP.isd.utils.read_sequence_file(seqfile,
505  first_residue_number=sequence_match[1])
506  tblr = IMP.isd.TBLReader.TBLReader(sequence,
507  sequence_match=sequence_match)
508  restraints = tblr.read_distances(tblfile, 'NOE')['NOE']
510  for i,restraint in enumerate(restraints):
511  if verbose and i % 100 == 0:
512  print "\r%d" % i,
513  sys.stdout.flush()
514  #a restraint is (contributions, dist, upper, lower, volume)
515  #where contributions is a tuple of contributing pairs
516  #and a pair is (c1, c2), where c1 is of the form (resno, atname)
517  #residue numbers start at 0
518  pairs=[(self.find_atom(i, prot).get_particle(),
519  self.find_atom(j, prot).get_particle()) for (i,j) in
520  restraint[0]]
521  pairs = IMP.container.ListPairContainer(pairs)
522  ln.add_contribution(pairs, restraint[4])
523  rs.add_restraint(ln)
524  if verbose:
525  print "\r%d NOE contributions added" % (len(restraints))
526  rs.set_weight(1.0)
527  self._m.add_restraint(rs)
528  return rs
529 
530  def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',
531  verbose=True):
532  """read TBL file and store lognormal restraints, using the marginal of the
533  lognormal with one sigma and gamma=1, for the whole dataset.
534  - prot: protein hierarchy
535  - seqfile: a file with 3-letter sequence
536  - tblfile: a TBL file with the restraints
537  - name: an optional name for the restraintset
538  - verbose: be verbose (default True)
539  Returns: data_rs
540  """
541  #likelihood
542  rs = IMP.RestraintSet(name)
543  #use the TBLReader to parse the TBL file.
544  sequence = IMP.isd.utils.read_sequence_file(seqfile)
545  tblr = IMP.isd.TBLReader.TBLReader(sequence)
546  restraints = tblr.read_distances(tblfile, 'HBond')['HBond']
548  for i,restraint in enumerate(restraints):
549  if verbose and i % 100 == 0:
550  print "\r%d" % i,
551  sys.stdout.flush()
552  #a restraint is (contributions, dist, upper, lower, volume)
553  #where contributions is a tuple of contributing pairs
554  #and a pair is (c1, c2), where c1 is of the form (resno, atname)
555  #residue numbers start at 0
556  pairs=[(self.find_atom(i, prot).get_particle(),
557  self.find_atom(j, prot).get_particle()) for (i,j) in
558  restraint[0]]
559  pairs = IMP.container.ListPairContainer(pairs)
560  ln.add_contribution(pairs, restraint[4])
561  rs.add_restraint(ln)
562  if verbose:
563  print "\r%d Hbond contributions added" % (len(restraints))
564  rs.set_weight(1.0)
565  self._m.add_restraint(rs)
566  return rs
567 
568  def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,
569  sequence_match=(1,1),name='TALOS', prior_rs=None,
570  bounds_kappa=(1.0, 0.1,10), verbose=True, prior='jeffreys',
571  keep_all=False):
572  """read TALOS dihedral angle data, and create restraints for phi/psi
573  torsion angles, along with the prior for kappa, which is a scale for the
574  whole dataset, compare to 1/sigma**2 in the NOE case.
575  - prot: protein hierarchy
576  - seqfile: a file with 3-letter sequence
577  - talos_data: either a file (pred.tab), or a folder (pred/) in which all
578  files in the form res???.tab can be found. If possible,
579  try to provide the folder, as statistics are more
580  accurate in that case.
581  - fulldata : either True or False, whether the data is the full TALOS
582  output (predAll.tab or pred/ folder), or just the averages
583  (pred.tab)
584  - sequence_match : (talos_no, sequence_no) to adjust for different
585  residue numberings
586  - name: an optional name for the restraintset
587  - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.
588  - bounds_kappa: tuple of (initial value, lower, upper bound)
589  bounds can be -1 to set to default range [0,+inf]
590  - verbose: be verbose (default True)
591  - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the
592  conjugate prior of the von Mises restraint, with parameters R
593  and c. Good values are R=0 and c=10. Default: jeffreys prior.
594  - keep_all: in case of a folder for 'talos_data', whether to keep
595  candidates marked as 'outliers' by TALOS, or to include them.
596  Returns: data_rs, prior_rs, kappa
597 
598  """
599  #prior
600  if verbose:
601  print "Prior for von Mises Kappa"
602  kappa=self.init_model_setup_scale(*bounds_kappa)
603  prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)
604  #likelihood
605  if verbose:
606  print "reading data"
607  rs=IMP.RestraintSet(name)
608  sequence= IMP.isd.utils.read_sequence_file(seqfile,
609  first_residue_no=sequence_match[1])
610  if fulldata:
611  talosr=IMP.isd.TALOSReader.TALOSReader(sequence, True, keep_all,
612  sequence_match=sequence_match)
613  if os.path.isdir(talos_data):
614  #using pred/res???.tab files
615  for i,res in enumerate(glob(os.path.join(talos_data,'res???.tab'))):
616  if verbose and i % 100:
617  print "\r%d" % i,
618  sys.stdout.flush()
619  talosr.read(res)
620  else:
621  #using predAll.tab file
622  talosr.read(talos_data)
623  else:
624  #using pred.tab file and correcting for estimates
625  talosr=IMP.isd.TALOSReader.TALOSReader(sequence, False, keep_all,
626  sequence_match=sequence_match)
627  talosr.read(talos_data)
628  #get harvested data and create restraints
629  data = talosr.get_data()
630  if verbose:
631  print "\rread dihedral data for %d residues" % len(data)
632  print "creating restraints"
633  avgR=[]
634  for resno,datum in data.iteritems():
635  phidata=datum['phi']
636  psidata=datum['psi']
637  num=datum['num']
638  res=IMP.atom.Residue(IMP.atom.get_residue(prot, resno))
641  if fulldata:
642  r=self.init_model_vonMises_restraint_full(phi, phidata, kappa)
643  rs.add_restraint(r)
644  if verbose:
645  avgR.append(r.get_R0())
646  r=self.init_model_vonMises_restraint_full(psi, psidata, kappa)
647  rs.add_restraint(r)
648  if verbose:
649  avgR.append(r.get_R0())
650  else:
651  r=self.init_model_vonMises_restraint_mean(phi, phidata, kappa)
652  rs.add_restraint(r)
653  if verbose:
654  avgR.append(r.get_R0())
655  r=self.init_model_vonMises_restraint_mean(psi, psidata, kappa)
656  rs.add_restraint(r)
657  if verbose:
658  avgR.append(r.get_R0())
659  if verbose:
660  print "%s Restraints created. Average R0: %f" % \
661  (len(avgR), sum(avgR)/float(len(avgR)))
662  rs.set_weight(1.0)
663  self._m.add_restraint(rs)
664  return rs, prior_rs, kappa
665 
666  def init_model_standard_SAXS_restraint(self, prot, profilefile, name='SAXS',
667  ff_type=IMP.saxs.HEAVY_ATOMS):
668  """read experimental SAXS profile and apply restraint the standard
669  way (like foxs)
670  Returns: a restraintset and the experimental profile
671  """
672  rs = IMP.RestraintSet(name)
673  saxs_profile = IMP.saxs.Profile(profilefile)
674  particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
675  saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)
676  rs.add_restraint(saxs_restraint)
677  rs.set_weight(1.0)
678  self._m.add_restraint(rs)
679  return rs, saxs_profile
680 
681  def _setup_md(self,prot, temperature=300.0, thermostat='berendsen',
682  coupling=500, md_restraints=None, timestep=1.0, recenter=1000,
683  momentum=1000):
684  """setup molecular dynamics
685  - temperature: target temperature
686  - thermostat: one of 'NVE', rescale_velocities',
687  'berendsen', 'langevin'
688  - coupling: coupling constant (tau (fs) for berendsen,
689  gamma (/fs) for langevin)
690  - md_restraints: if not None, specify the terms of the energy to be used
691  during the md steps via a list of restraints.
692  - timestep: in femtoseconds.
693  - recenter: recenter the molecule every so many steps (Langevin only)
694  - momentum: remove angular momentum every so many steps (Berendsen only)
695  Returns: an instance of md and an instance of an OptimizerState (the
696  thermostat), or None if NVE.
697  """
698  ## Molecular Dynamics (from MAX BONOMI)
700  md.set_model(self._m)
701  md.assign_velocities(temperature)
702  md.set_time_step(timestep)
703  if thermostat == 'NVE':
704  os = None
705  elif thermostat == 'rescale_velocities':
707  IMP.atom.get_leaves(prot), temperature, 0)
708  md.add_optimizer_state(os)
709  elif thermostat == 'berendsen':
711  IMP.atom.get_leaves(prot), temperature, coupling)
712  md.add_optimizer_state(os)
714  IMP.atom.get_leaves(prot), momentum)
715  md.add_optimizer_state(mom)
716  elif thermostat == 'langevin':
718  IMP.atom.get_leaves(prot), temperature, coupling)
719  md.add_optimizer_state(os)
720  #cen = IMP.atom.RemoveTranslationOptimizerState(
721  # IMP.atom.get_leaves(prot), recenter)
722  #md.add_optimizer_state(cen)
723  else:
724  raise NotImplementedError, thermostat
725 
726  if md_restraints:
727  md.set_restraints(md_restraints)
728 
729  return md, os
730 
731  def _setup_normal_mover(self, particle, floatkey, stepsize):
732  """setup NormalMover to move particle's floatkey attribute
733  by a gaussian with standard deviation 'stepsize'
734  Returns: mover instance.
735  """
737  cont.add_particle(particle)
738  nm=IMP.core.NormalMover(cont,
739  IMP.FloatKeys([floatkey]),stepsize)
740  return nm
741 
742  def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):
743  """setup MDMover using md and particles.
744  - md: md instance
745  - particles: particles to move, usually the leaves of the protein
746  hierarchy
747  - n_md_steps: number of md steps to perform on each move
748  Returns: mover instance.
749  """
751  cont.add_particles(particles)
752  return IMP.atom.MDMover(cont, md, temperature, n_md_steps)
753 
754  def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):
755  """setup monte carlo using a certain mover.
756  - mover: mover to use, NormalMover or MDMover usually.
757  - temperature: target temperature.
758  - mc_restraints: if not None, list of restraints for the metropolis
759  criterion.
760  Returns: mc instance.
761  """
762  mc = IMP.core.MonteCarlo(self._m)
763  #why is this returning an int?
764  mc.add_mover(mover)
765  #set same temp as MD, careful with units
766  mc.set_kt(kB*temperature)
767  #allow to go uphill
768  mc.set_return_best(False)
769  #update all particles each time
770  mc.set_move_probability(1.0)
771  if mc_restraints:
772  #tell mc to only use restraints involving the particle
773  mc.set_restraints(mc_restraints)
774  return mc
775 
776  def init_simulation_setup_protein_hmc_hopper(self, prot, temperature=300.0,
777  gamma=0.01, n_md_steps=10,
778  md_restraints=None, mc_restraints=None, timestep=1.0,
779  sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):
780  """setup hybrid monte-carlo on protein. Uses basin hopping with steepest
781  descent minimization.
782  - prot: protein hierarchy.
783  - temperature: target temperature.
784  - gamma: coupling constant for langevin (/fs)
785  - n_md_steps: number of md steps per mc step
786  - md_restraints: if not None, specify the terms of the energy to be used
787  during the md steps.
788  - mc_restraints: if not None, use these energy terms for the metropolis
789  criterion.
790  - timestep: time step for md, in femtoseconds.
791  - sd_threshold: stop steepest descent after energy difference drops
792  below that threshold
793  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
794  - sd_maxsteps: maximum number of steps for steepest descent
795  Returns: hmc, mdmover, md and OptimizerState (thermostat)
796  """
797  raise NotImplementedError
798  md, os = self._setup_md(prot, temperature=temperature,
799  thermostat='langevin', coupling=gamma,
800  md_restraints=md_restraints, timestep=timestep)
801  particles=IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
802  mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)
803  hmc = self._setup_mc(mdmover, temperature, mc_restraints)
804  sd=IMP.core.SteepestDescent(self._m)
805  sd.set_threshold(sd_threshold)
806  sd.set_step_size(sd_stepsize)
807  hmc.set_local_optimizer(sd)
808  hmc.set_local_steps(sd_maxsteps)
809  hmc.set_use_basin_hopping(True)
810  return hmc, mdmover, md, os
811 
812  def init_simulation_setup_protein_hmc_nve(self, prot, temperature=300.0,
813  n_md_steps=100, md_restraints=None, mc_restraints=None,
814  timestep=1.0):
815  """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full
816  - prot: protein hierarchy.
817  - temperature: target temperature.
818  - coupling: coupling constant (tau (fs) for berendsen,
819  gamma (/fs) for langevin)
820  - n_md_steps: number of md steps per mc step
821  - md_restraints: if not None, specify the terms of the energy to be used
822  during the md steps.
823  - mc_restraints: if not None, use these energy terms for the metropolis
824  criterion.
825  - timestep: time step for md, in femtoseconds.
826  - sd_threshold: stop steepest descent after energy difference drops
827  below that threshold
828  - sd_stepsize: stepsize to use for the steepest descent, in angstrom.
829  - sd_maxsteps: maximum number of steps for steepest descent
830  Returns: hmc, mdmover and md
831  """
832  prot = self._p['prot']
833  md, os = self._setup_md(prot, temperature=temperature,
834  thermostat='NVE', coupling=None, timestep=1.0,
835  md_restraints=md_restraints)
836  particles=IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)
838  cont.add_particles(particles)
839  mdmover = PyMDMover(cont, md, n_md_steps)
840  mdmover.m=self._m
841  #mdmover = IMP.atom.MDMover(cont, md, temperature, n_md_steps)
842  mc = PyMC(self._m)
843  #mc = IMP.core.MonteCarlo(self._m)
844  #why is this returning an int?
845  mc.add_mover(mdmover)
846  #set same temp as MD, careful with units
847  mc.set_kt(kB*temperature)
848  #allow to go uphill
849  mc.set_return_best(False)
850  #update all particles each time
851  mc.set_move_probability(1.0)
852  return mc, mdmover, md
853 
854  def init_simulation_setup_scale_mc(self, scale, temperature=300.0,
855  mc_restraints=None, floatkey=IMP.FloatKey("scale"), nm_stepsize=0.1):
856  """sets up monte carlo on scale, at a certain target temperature,
857  optionnally using a certain set of restraints only.
858  - scale: scale particle
859  - temperature: target temperature
860  - mc_restraints: optional set of restraints from which the energy should
861  be drawn instead of the energy of the complete system.
862  - floatkey: the floatkey to move.
863  - nm_stepsize: the stepsize of the normal mover
864  Returns: mc instance, nm instance.
865  """
866  nm = self._setup_normal_mover(scale, floatkey, nm_stepsize)
867  mc = self._setup_mc(nm, temperature, mc_restraints)
868  return mc, nm
869 
870  def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,
871  adjust_stepsize=True):
872  """run mc using a normal mover on a single particle,
873  update stepsize and store nsteps, acceptance and stepsize in the
874  statistics instance self.stat by using the key stats_key.
875  """
876  #do the monte carlo
877  mc.optimize(nsteps)
878  naccept = mc.get_number_of_forward_steps()
879  #increment the counter for this simulation
880  self.stat.increment_counter(stats_key, nsteps)
881  #get the acceptance rate, stepsize
882  accept = float(naccept)/nsteps
883  self.stat.update(stats_key, 'acceptance', accept)
884  stepsize = nm.get_sigma()
885  self.stat.update(stats_key, 'stepsize', stepsize)
886  #update stepsize if needed and requested
887  if adjust_stepsize:
888  if 0.4 < accept < 0.6:
889  return
890  if accept < 0.05:
891  accept = 0.05
892  if accept > 1.0:
893  accept = 1.0
894  nm.set_sigma(stepsize*2*accept)
895 
896  def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,
897  adjust_stepsize=True):
898  """run hmc, update stepsize and print statistics. Updates number of MD
899  steps to reach a target acceptance between 0.4 and 0.6, sends
900  statistics to self.stat. MD steps are always at least 10 and at most 500.
901  """
902  hmc.optimize(nsteps)
903  naccept = hmc.get_number_of_forward_steps()
904  self.stat.increment_counter(stats_key, nsteps)
905  accept = float(naccept)/nsteps
906  self.stat.update(stats_key, 'acceptance', accept)
907  mdsteps = mv.get_number_of_steps()
908  self.stat.update(stats_key, 'n_md_steps', mdsteps)
909  if adjust_stepsize:
910  if 0.4 < accept < 0.6:
911  return
912  mdsteps = int(mdsteps *2**(accept-0.5))
913  if mdsteps > 500:
914  mdsteps = 500
915  if mdsteps < 10:
916  mdsteps = 10
917  mv.set_nsteps(mdsteps)
918 
919  def get_pdb(self, prot):
920  """returns a string corresponding to the pdb structure of hierarchy
921  prot.
922  """
923  output = StringIO()
924  IMP.atom.write_pdb(prot, output)
925  return output.getvalue()
926 
927  def get_netcdf(self, prot):
928  raise NotImplementedError
929 
930  def do_md_protein_statistics(self, md_key, nsteps, md_instance,
931  temperature=300.0, prot_coordinates=None):
932  """updates statistics for md simulation: target temp, kinetic energy,
933  kinetic temperature, writes coordinates and increments counter.
934  - md_key: stats md key
935  - nsteps: number of steps performed.
936  - md_instance: instance of the MolecularDynamics class.
937  - temperature: target temperature
938  - prot_coordinates: protein coordinates to be passed to the stats class,
939  (should be a string)
940  """
941  self.stat.update(md_key, 'target_temp', temperature)
942  kinetic = md_instance.get_kinetic_energy()
943  self.stat.update(md_key, 'E_kinetic', kinetic)
944  self.stat.update(md_key, 'instant_temp',
945  md_instance.get_kinetic_temperature(kinetic))
946  self.stat.update_coordinates(md_key, 'protein', prot_coordinates)
947  self.stat.increment_counter(md_key, nsteps)
948 
949  def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):
950  """shortcut for a frequent series of operations on MC simulations'
951  statistics. Creates an entry for acceptance, stepsize and one
952  coordinate set printed in the statistics file.
953  """
954  #create category
955  mc_key = stat.add_category(name=name)
956  #giving None as argument is a way to create a static entry.
957  stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))
958  stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))
959  stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))
960  #special call to add coordinates to be dumped
961  stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))
962  #add the counter to the output
963  stat.add_entry(mc_key, name='counter')
964  return mc_key
965 
966  def init_stats_add_md_category(self, stat, name='md', coord='protein'):
967  """shortcut for a frequent series of operations on MD simulations'
968  statistics. Creates an entry for target temp, instantaneous temp,
969  kinetic energy, and one set of coordinates called 'protein' by
970  default.
971  """
972  #create category
973  md_key = stat.add_category(name=name)
974  #giving None as argument is a way to create a static entry.
975  stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))
976  stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))
977  stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))
978  #special call to add coordinates to be dumped
979  stat.add_coordinates(md_key, coord)
980  #add the counter to the output
981  stat.add_entry(md_key, name='counter')
982  return md_key
983 
984  def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):
985  """shortcut for a frequent series of operations on HMC simulations'
986  statistics. Adds acceptance, number of MD steps and a trajectory for
987  a protein.
988  """
989  #create category
990  hmc_key = stat.add_category(name=name)
991  #giving None as argument is a way to create a static entry.
992  stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))
993  stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))
994  stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))
995  stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))
996  #special call to add coordinates to be dumped
997  stat.add_coordinates(hmc_key, coord)
998  #add the counter to the output
999  stat.add_entry(hmc_key, name='counter')
1000  return hmc_key
1001 
1002  def rescale_velocities(self, particles, factor):
1003  """rescale the velocities of a bunch of particles having vx vy and vz
1004  floatkeys
1005  """
1006  keys=[IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]
1007  for p in particles:
1008  for k in keys:
1009  p.set_value(k, p.get_value(k)*factor)