IMP  2.2.0
The Integrative Modeling Platform
peptideDocker.py
1 import IMP
2 import subprocess
3 import random
4 import IMP.domino
5 import IMP.core
6 import IMP.rmf
7 import RMF
8 
9 import time
10 import IMP.algebra
11 import types
12 import re
13 import sys
14 import operator
15 import os
16 import atomicDominoUtilities
17 
18 
19 class PeptideDocker:
20 
21  def __init__(self, parameterFileName):
22  self.createdMdOptimizer = 0
23  self.fixedNonFlexibleAtoms = 0
24  self.readParameters(parameterFileName)
25  self.wroteNativeProtein = 0
26 
27  def loadHelpers(self):
28  self.loadDockingHelpers()
29  self.loadMdHelpers()
30 
31  def readParameters(self, parameterFileName):
32  self.parameters = atomicDominoUtilities.readParameters(
33  parameterFileName)
34 
35  # Processing specific to peptide docking and this script
36  def loadDockingHelpers(self):
37  outputDir = self.getParam("output_directory")
38  mkdirProcess = subprocess.Popen(
39  ['mkdir',
40  '-p',
41  self.getParam('output_directory')],
42  shell=False,
43  stderr=subprocess.PIPE)
44  output = mkdirProcess.communicate()
45 
46  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
47  self.protein)
48 
49  self.times = []
50  self.rawTimes = []
51 
52  # Helper method; create dictionary where the keys are names of all
53  # particles in the system and the values are themselves dictionaries
54  def initializeParticleNameDict(self, leaves):
55  particleDict = {}
56 
57  for leaf in leaves:
58  name = self.quickParticleName(leaf)
59  particleDict[name] = {}
60  return particleDict
61 
62  def getModel(self):
63  return self.model
64 
65  def getProtein(self):
66  return self.protein
67 
68  def createModel(self):
69 
70  pdbName = self.getParam("native_pdb_input_file")
71 
72  self.model = IMP.kernel.Model()
73 
74  self.protein = IMP.atom.read_pdb(
75  pdbName,
76  self.model,
78 
79  # atom radii, forcefield topology, etc. -- no restraints created here
80 
82  IMP.atom.get_data_path("par.lib"))
83 
84  topology = ff.create_topology(self.protein)
85  topology.apply_default_patches()
86 
87  topology.add_atom_types(self.protein)
88 
89  ff.add_radii(self.protein)
90  ff.add_well_depths(self.protein)
91  self.ff = ff
92  self.topology = topology
93 
94  # Get parameter value. Throws dictionary exception if not found
95  def getParam(self, paramName):
96  paramValue = self.parameters[paramName]
97  return paramValue
98 
99  # Peptide-docking friendly way to log which restraint we're dealing with
100  def quickRestraintName(self, r):
101  nameList = []
102  ps = r.get_input_particles()
103 
104  for p in ps:
105  if (self.isAtomParticle(p) == 0):
106  continue
107  isFlexible = 0
108  if ((self.quickParticleName(p) in self.flexibleAtoms) == 1):
109  isFlexible = 1
110  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
111 
112  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
113 
114  def quickParticleName(self, particle):
115  return atomicDominoUtilities.quickParticleName(particle)
116 
117  # output current time
118  def logTime(self, label):
119  nextTime = time.asctime(time.localtime(time.time()))
120  print "%s:\t %s" % (label, nextTime)
121  nextTimeEntry = [label, nextTime]
122  self.times.append(nextTimeEntry)
123  self.rawTimes.append(time.time())
124 
125  # output all times that were saved by logTime()
126  def outputTimes(self):
127  print "Time for each step:"
128  for timeEntry in self.times:
129  label = timeEntry[0]
130  value = timeEntry[1]
131  print "%s:\t%s" % (label, value)
132 
133  def getFlexibleAtoms(self):
134  return self.flexibleAtoms
135 
136  # Helper method to quickly get the chain identifier for a particle
137  def getChain(self, particle):
138  atomDecorator = IMP.atom.Atom.decorate_particle(particle)
139  residue = IMP.atom.get_residue(atomDecorator)
140  chain = IMP.atom.get_chain(residue)
141  chainId = chain.get_id()
142  return chainId
143 
144  # Helper method to quickly get the residue index for a particle
145  def getResidue(self, particle):
146  atomDecorator = IMP.atom.Atom.decorate_particle(particle)
147  residue = IMP.atom.get_residue(atomDecorator)
148  residueNumber = residue.get_index()
149  return residueNumber
150 
151  def isAtomParticle(self, p):
152  return atomicDominoUtilities.isAtomParticle(p)
153 
154  # Read all particles in restraint; return 1 if all are flexible and 0 if
155  # at least one atom is fixed
156  def allFlexibleAtoms(self, r):
157  ps = r.get_input_particles()
158  for p in ps:
159  if (self.isAtomParticle(p) == 0):
160  continue
161  if ((self.quickParticleName(p) in self.flexibleAtoms) == 0):
162  return 0
163  return 1
164 
165  # Check if we should skip processing of this pair of particles. Don't add restraints between particles that aren't optimized,
166  # and don't add restraints between particles on the same residue (for now
167  # assume stereochemistry is good enough for this)
168  def skipNonBondedPair(self, pair):
169 
170  # skip if same residue
171  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
172  return 1
173 
174  # skip if neither is flexible
175  if ((self.quickParticleName(pair[0]) in self.flexibleAtoms) == 0 and (self.quickParticleName(pair[1]) in self.flexibleAtoms) == 0):
176  return 1
177  return 0
178 
179  # Set up degrees of freedom for the system. Either we are dealing with a fixed protein or a flexible one.
180  # Determine which atoms are flexible and save them in self.flexibleAtoms
181  def initDof(self):
182 
183  peptideChain = self.getParam("peptide_chain")
184  self.flexibleAtoms = {}
185 
186  if (self.getParam("flexible_protein") == "yes"):
187  # All atoms within range of the peptide are flexible; range defined
188  # by close_pair_distance param
189  particlePairs = self.getClosePairs()
190 
191  for pair in particlePairs:
192  p0 = pair[0]
193  p1 = pair[1]
194  p0Chain = self.getChain(p0)
195  p1Chain = self.getChain(p1)
196  if (p0Chain == peptideChain or p1Chain == peptideChain):
197  self.flexibleAtoms[self.quickParticleName(p0)] = 1
198  self.flexibleAtoms[self.quickParticleName(p1)] = 1
199 
200  else:
201  # only peptide atoms are flexible, determined by the chain id
202  leaves = IMP.atom.get_leaves(self.protein)
203  for leaf in leaves:
204  if (self.getChain(leaf) == peptideChain):
205  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
206  self.fixNonFlexibleAtoms()
207 
208  # return a dictionary where the keys are the names of the fixed atoms in
209  # the system
210  def getFixedAtoms(self):
211  leaves = IMP.atom.get_leaves(self.protein)
212  fixedAtoms = {}
213  for leaf in leaves:
214  leafName = self.quickParticleName(leaf)
215  if (leafName in self.flexibleAtoms) == 0:
216  fixedAtoms[leafName] = 1
217  return fixedAtoms
218 
219  # for each flexible atom, give it the coordinates of another flexible atom (ensures that each atom starts out in the binding site)
220  # Although it does guarantee a very random initial configuration, it is
221  # cheating since we are using knowledge of the native state
222  def initializeRandomExisting(self):
223  leaves = IMP.atom.get_leaves(self.protein)
224  particlesToVary = []
225  xyzs = []
226  for leaf in leaves:
227  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 1):
228  particlesToVary.append(self.quickParticleName(leaf))
229  atomToCoordinates = {}
230  for pName in particlesToVary:
231  randomPname = random.choice(particlesToVary)
232  randomP = self.namesToParticles[randomPname]
233  print "setting coordinates for particle %s to the ones currently set for particle %s" % (pName, randomPname)
234  atomToCoordinates[pName] = IMP.core.XYZ.decorate_particle(
235  randomP).get_coordinates()
236 
237  for pName in particlesToVary:
238  particle = self.namesToParticles[pName]
239  xyz = IMP.core.XYZ.decorate_particle(particle)
240  xyz.set_coordinates(atomToCoordinates[pName])
241  xyzs.append(xyz)
242  return xyzs
243 
244  # Set initial positions of flexible atoms in the system. We are
245  # experimenting with the best way to do this
246  def setInitialPositions(self):
247  initialPositionMethod = self.getParam("initial_position_method")
248 
249  # set random coordinates for atom by creating bounding box around native coordinates, increasing it
250  # by defined offset plus random number (necessary when running on
251  # cluster)
252  if (initialPositionMethod == "random"):
253 
254  myRandom = random.random()
255  initialOffset = float(
256  self.getParam("initial_atom_offset")) + myRandom
257  for particleName in self.flexibleAtoms.keys():
258  p = self.namesToParticles[particleName]
259  xyzDecorator = IMP.core.XYZ.decorate_particle(p)
260  coordinates = xyzDecorator.get_coordinates()
261  bb = IMP.algebra.BoundingBox3D(coordinates)
262  bb += initialOffset
263  bb += myRandom
264 
265  randomXyz = IMP.algebra.get_random_vector_in(bb)
266  xyzDecorator.set_coordinates(randomXyz)
267 
268  # for each flexible atom, give it the coordinates of another flexible
269  # atom (ensures that each atom starts out in the binding site)
270  elif (initialPositionMethod == "random_existing"):
271  print "start random existing"
272  xyzs = self.initializeRandomExisting()
273 
274  # same as random_existing except offset each atom a little (doesn't
275  # seem to make too much of a difference)
276  elif (initialPositionMethod == "random_existing_box"):
277  xyzs = self.initializeRandomExisting()
278  initialOffset = float(self.getParam("initial_atom_offset"))
279  for xyzDecorator in xyzs:
280  coordinates = xyzDecorator.get_coordinates()
281  bb = IMP.algebra.BoundingBox3D(coordinates)
282  bb += initialOffset
283 
284  randomXyz = IMP.algebra.get_random_vector_in(bb)
285  xyzDecorator.set_coordinates(randomXyz)
286 
287  elif (initialPositionMethod == "random_box"):
288 
289  # Get bounding box containing all particles within close pair distance of flexible atoms
290  # This doesn't work perfectly; can put initial positions in the fixed atoms accidentally
291  # As constructed it is specific to peptide atoms only and not
292  # flexible protein atoms
293  bsParticles = []
294 
295  for firstPname in self.nonBondedPairs.keys():
296  secondPnames = self.nonBondedPairs[firstPname]
297  for secondPname in secondPnames.keys():
298  if ((secondPname in self.flexibleAtoms) == 0):
299  fixedAtom = self.namesToParticles[firstPname]
300 
301  bsParticles.append(
303  print "added next close atom %s to bounding box particles" % firstPname
304  # bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert
305  # if I can figure out how
306  peptideParticles = []
307  leaves = IMP.atom.get_leaves(self.protein)
308  for leaf in leaves:
309  if (self.getChain(leaf) == self.getParam("peptide_chain")):
310  peptideParticles.append(
312  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
313  # set initial position for each flexible atoms, and adjust it if it
314  # is too close to a fixed residue
315  fixedAtoms = self.getFixedAtoms()
316  for flexPname in self.flexibleAtoms.keys():
317  goodPosition = 0
318  flexParticle = self.namesToParticles[flexPname]
319  flexXyzDecorator = IMP.core.XYZ.decorate_particle(flexParticle)
320  print "processing position for pname %s" % flexPname
321  while(goodPosition == 0):
322  goodPosition = 1
323  flexXyzDecorator.set_coordinates(
324  IMP.algebra.get_random_vector_in(bsBoundingBox))
325  for fixedPname in fixedAtoms.keys():
326  fixedParticle = self.namesToParticles[fixedPname]
327  distance = IMP.algebra.get_distance(
329  fixedParticle).get_coordinates(
330  ),
331  IMP.core.XYZ.decorate_particle(flexParticle).get_coordinates())
332  if (distance < 2):
333  print "finding new position for %s" % flexPname
334  goodPosition = 0
335  # start the while loop over which will set new
336  # coordinates
337  break
338 
339  # take a box around the whole protein and randomly initialize atoms
340  # here
341  elif (initialPositionMethod == "random_full"):
342  bb = IMP.atom.get_bounding_box(self.protein)
343  for particleName in self.flexibleAtoms.keys():
344  print "creating random position for particle %s" % particleName
345  p = self.namesToParticles[particleName]
346  xyzDecorator = IMP.core.XYZ.decorate_particle(p)
347  randomXyz = IMP.algebra.get_random_vector_in(bb)
348  myRandom = random.random()
349  bb += myRandom
350  xyzDecorator.set_coordinates(randomXyz)
351 
352  # Read in a file generated from a previous run (fastest way is to
353  # create a new model and copy corresponding XYZ Decorators)
354  elif (initialPositionMethod == "file"):
355 
356  initialPositionFile = self.getParam("saved_initial_atom_positions")
357  print "reading initial positions from %s" % initialPositionFile
358  initialModel = IMP.kernel.Model()
359  initialProtein = IMP.atom.read_pdb(
360  initialPositionFile,
361  initialModel,
363  initialLeaves = IMP.atom.get_leaves(initialProtein)
364  for initialLeaf in initialLeaves:
365  name = self.quickParticleName(initialLeaf)
366  if ((name in self.namesToParticles) == 1):
367  existingLeaf = self.namesToParticles[name]
368  existingLeafXyz = IMP.core.XYZ.decorate_particle(
369  existingLeaf)
370  initialLeafXyz = IMP.core.XYZ.decorate_particle(
371  initialLeaf)
372  existingLeafXyz.set_coordinates(
373  initialLeafXyz.get_coordinates())
374  else:
375  print "Read in initial positions from file %s but this file contained particle %s which is not in current model" % (initialPositionFile, name)
376  else:
377  print "Please specify valid initial position method (random or file)\n"
378  sys.exit()
379 
380  # output initial positions
381  outputDir = self.getParam("output_directory")
382  initialAtomPositionFile = self.getParam(
383  "initial_atom_positions_output_file")
384  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
385  print "writing output to file %s" % fullOutputFile
386  IMP.atom.write_pdb(self.protein, fullOutputFile)
387 
388  # get initial score for model
389  initialScore = self.model.evaluate(False)
390  print "initial score for model is %s" % initialScore
391 
392  # Get non-bonded pairs in the system in preparation for restraining them
393  def getNonBondedPairs(self):
394  nonBondedDefinition = self.getParam("non_bonded_definition")
395  particlePairs = []
396  if (nonBondedDefinition == "close_pairs"):
397  particlePairs = self.getClosePairs()
398  elif (nonBondedDefinition == "random"):
399  particlePairs = self.getRandomParticlePairs()
400  elif (nonBondedDefinition == "manual"):
401  particlePairs = self.readManualRestraints(nativeProtein, 6)
402  else:
403  print "Please specify valid non bonded pair definition"
404  sys.exit()
405  return particlePairs
406 
407  # Get 3D distance between the particles in the pair
408  def getXyzDistance(self, pair):
409  d0 = IMP.core.XYZ.decorate_particle(pair[0])
410  d1 = IMP.core.XYZ.decorate_particle(pair[1])
411  distance = IMP.core.get_distance(d0, d1)
412  return distance
413 
414  # Get all pairs of particles within the distance defined by
415  # close_pair_distance parameter
416  def getClosePairs(self):
417 
418  # Get a list of all atoms in the protein, and put it in a container
419  leaves = IMP.atom.get_leaves(self.protein)
421  closePairDistance = float(self.getParam("close_pair_distance"))
422  # sometimes closePairContainer returns particles > distanceCutoff, even
423  # with slack = 0
424  useHardCutoff = self.getParam("use_hard_distance_cutoff")
425 
426  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
427 
428  self.model.evaluate(False)
429 
430  particlePairs = nbl.get_particle_pairs()
431  finalPairs = []
432  for pair in particlePairs:
433  distance = self.getXyzDistance(pair)
434  q0 = self.quickParticleName(pair[0])
435  q1 = self.quickParticleName(pair[1])
436 
437  if (useHardCutoff == "0" or (useHardCutoff == "1" and distance < closePairDistance)):
438  finalPairs.append([pair[0], pair[1]])
439 
440  return finalPairs
441 
442  # Add restraints for bond lengths, angles, dihedrals, and impropers
443  def addForceFieldRestraints(self):
444 
445  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
446  if (useForcefieldRestraints == "1"):
447 
449  IMP.atom.get_data_path("top_heav.lib"),
450  IMP.atom.get_data_path("par.lib"))
451 
452  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
453  # impropers) of the CHARMM forcefield
454  bonds = self.topology.add_bonds(self.protein)
455  angles = ff.create_angles(bonds)
456  dihedrals = ff.create_dihedrals(bonds)
457  impropers = self.topology.add_impropers(self.protein)
458 
459  # tracks which particles are together in forcefield restraints
460  # (useful for filtering excluded volume)
461  self.ffParticleGroups = self.initializeParticleNameDict(
462  IMP.atom.get_leaves(self.protein))
463 
464  self.createForceFieldRestraintsForModel(
465  bonds,
466  "bonds",
467  "Bond Restraints",
470  0,
471  1)))
472  self.createForceFieldRestraintsForModel(
473  angles,
474  "angles",
475  "Angle Restraints",
478  0,
479  1)))
480  self.createForceFieldRestraintsForModel(
481  dihedrals,
482  "dihedrals",
483  "Dihedral Restraints",
485  self.createForceFieldRestraintsForModel(
486  impropers,
487  "impropers",
488  "Improper Restraints",
491  0,
492  1)))
493 
494  else:
495  print ("Skipping forcefield restraints")
496 
497  # Create one type of forcefield restraint. The restraint is initially across the full model and needs to be decomposed
498  # so it can be used in the restraint graph.
499  def createForceFieldRestraintsForModel(
500  self,
501  ffParticles,
502  containerName,
503  restraintName,
504  singletonScore):
505 
506  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
507  cont = IMP.container.ListSingletonContainer(self.model, containerName)
508  cont.add_particles(ffParticles)
509  listBondRestraint = IMP.container.SingletonsRestraint(
510  singletonScore, cont, restraintName)
511  self.model.add_restraint(listBondRestraint)
512 
513  # Restraint decomposition has been a little touchy; a couple
514  # workarounds contained
515  decomposedRestraintTemp = listBondRestraint.create_decomposition()
516  decomposedRestraints = IMP.kernel.RestraintSet.get_from(
517  decomposedRestraintTemp)
518  rs_restraints = decomposedRestraints.get_restraints()
519 
520  # manually remove the full restraint after getting the decomposition
521  self.model.remove_restraint(listBondRestraint)
522 
523  count = decomposedRestraints.get_number_of_restraints()
524  for i in range(count):
525 
526  r = decomposedRestraints.get_restraint(i)
527 
528  if (self.allFlexibleAtoms(r) == 1):
529  # add the decomposed restraint to the model
530  self.model.add_restraint(r)
531  self.model.set_maximum_score(r, maxForceFieldScore)
532  self.addParticlesToGroups(r)
533  else:
534  t = 1
535  # self.model.remove_restraint(r)
536 
537  # Add all atoms in r to ffParticleGroups; tracks which atoms are restrained by forcefield terms so
538  # we don't restrain them in other methods (e.g. excluded volume) --
539  # essentially a manual pair-filter
540  def addParticlesToGroups(self, r):
541  inputParticles = []
542  ps = r.get_input_particles()
543 
544  for p in ps:
545 
546  if (self.isAtomParticle(p) == 0): # skip particles that aren't actually atoms (e.g. bond particles)
547  continue
548  inputParticles.append(p.get_name())
549 
550  for firstName in inputParticles:
551  for secondName in inputParticles:
552  if (firstName == secondName):
553  continue
554  self.ffParticleGroups[firstName][secondName] = 1
555 
556  # Add excluded volume restraints across close pairs. Still a work in progress as they have been slowing down
557  # the code quite a bit and we'll probably want to turn them on only at a
558  # defined time
559  def addExcludedVolumeRestraints(self):
560 
561  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
562  self.protein, self.model, self.namesToParticles)
563  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
564 
565  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
566  self.model.add_restraint(evr)
567 
568  # Add lennard jones restraints between flexible atoms if they haven't been
569  # added already, and then scale radii of flexible atoms by input scaling
570  # factor
571  def addFlexFlexLjRestraints(self, scalingFactor):
572  if (self.initializedFlexFlexLjRestraints == 0):
573  print "adding initial lennard jones restraints between pairs of flexible atoms"
574  self.initializedFlexFlexLjRestraints = 1
575  leaves = IMP.atom.get_leaves(self.protein)
576  counter = 0
577 
578  sf = IMP.atom.ForceSwitch(6.0, 7.0)
580  for i in range(len(leaves)):
581  iLeaf = leaves[i]
582  for j in range(i + 1, len(leaves)):
583  jLeaf = leaves[j]
584  if ((self.quickParticleName(iLeaf) in self.flexibleAtoms) == 1 and (self.quickParticleName(jLeaf) in self.flexibleAtoms) == 1):
585  ljpr = IMP.core.PairRestraint(
586  ps, [iLeaf, jLeaf], "LennardJones %s_%s" %
587  (i, j))
588  self.model.add_restraint(ljpr)
589  chainHierarchies = IMP.atom.get_by_type(
590  self.protein, IMP.atom.CHAIN_TYPE)
591  peptideChainHierarchy = None
592  print "scaling atomic radii by scaling factor %s" % scalingFactor
593  # reset radii back to original in preparation for rescaling
594  self.ff.add_radii(self.protein)
595  for pName in self.flexibleAtoms.keys():
596  particle = self.namesToParticles[pName]
597  xyzr = IMP.core.XYZR.decorate_particle(particle)
598  radius = xyzr.get_radius()
599  radius *= scalingFactor
600  xyzr.set_radius(radius)
601 
602  # add lennard-jones restraints between all fixed atoms and all flexible
603  # atoms
604  def addFixedFlexibleLjRestraints(self):
605  print "adding initial fixed flexible lj restraints"
606  fixedAtoms = self.getFixedAtoms()
607  sf = IMP.atom.ForceSwitch(6.0, 7.0)
609 
610  for flexPname in self.flexibleAtoms.keys():
611  flexParticle = self.namesToParticles[flexPname]
612  for fixedPname in fixedAtoms.keys():
613  fixedParticle = self.namesToParticles[fixedPname]
614  ljpr = IMP.core.PairRestraint(
615  ps, [fixedParticle, flexParticle], "LennardJones %s_%s" %
616  (flexPname, fixedPname))
617  self.model.add_restraint(ljpr)
618 
619  # actually do the work to create the DOPE restraint
620  def createDopeRestraint(self, pair):
621  q0 = self.quickParticleName(pair[0])
622  q1 = self.quickParticleName(pair[1])
623  # create restraint around this distance, add to model
624  prName = "NonBonded %s_%s" % (q0, q1)
625  pairRestraint = IMP.core.PairRestraint(
626  self.dopePairScore, pair, prName)
627 
628  # print "Added non-bonded restraint between %s and %s" % (q0, q1)
629  self.model.add_restraint(pairRestraint)
630  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
631  self.model.set_maximum_score(pairRestraint, maxNonBondedScore)
632  #initialScore = pairRestraint.evaluate(0)
633 
634  # add non-bonded restraints across close pairs. At least one atom in each
635  # pair should be flexible
636  def addClosePairNonBondedRestraints(self):
637  particlePairs = self.getNonBondedPairs()
638 
639  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
640  dopeCutoff = float(self.getParam("dope_cutoff"))
641 
643 
644  ps = 0
645  if (nonBondedRestraintType == "dope"):
646  IMP.atom.add_dope_score_data(self.protein)
647  ps = IMP.atom.DopePairScore(dopeCutoff)
648  self.dopePairScore = ps
649  nonBondedCount = 0
650  self.nonBondedPairs = self.initializeParticleNameDict(
651  IMP.atom.get_leaves(self.protein))
652  for pair in particlePairs:
653 
654  if (self.skipNonBondedPair(pair) == 1):
655  continue
656 
657  self.createDopeRestraint(pair)
658 
659  nonBondedCount += 1
660  self.nonBondedPairs[self.quickParticleName(pair[0])][
661  self.quickParticleName(pair[0])] = 1
662  self.nonBondedPairs[self.quickParticleName(pair[1])][
663  self.quickParticleName(pair[1])] = 1
664 
665  print "Added %s non-bonded restraints" % nonBondedCount
666  self.model.evaluate(False)
667 
668  def addCompleteNonBondedRestraints(self):
669  leaves = IMP.atom.get_leaves(self.protein)
670  counter = 0
671 
672  for i in range(len(leaves)):
673  iLeaf = leaves[i]
674  for j in range(i + 1, len(leaves)):
675  jLeaf = leaves[j]
676 
677  if ((self.quickParticleName(jLeaf) in self.nonBondedPairs[self.quickParticleName(iLeaf)]) == 1):
678  continue
679  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
680  continue
681 
682  self.createDopeRestraint([iLeaf, jLeaf])
683  counter += 1
684  # print "added complete restraint between %s and %s" %
685  # (self.quickParticleName(iLeaf),
686  # self.quickParticleName(jLeaf))
687  print "added %s restraints for %s particles" % (counter, len(leaves))
688  self.model.evaluate(False)
689 
690  # lots of output on all restraints
691  def writeAllRestraints(self):
692  print "write all restraints:"
693  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
694  print self.quickRestraintName(r)
695 
696  # Read parameterss with md temperature / step schedule and save them
697  def initializeMdSchedule(self):
698  schedule = self.getParam("md_schedule")
699  stages = schedule.split()
700  totalSteps = 0
701  for stage in stages:
702  print "unpacking stage %s" % stage
703  [steps, temperature, excludedVolume] = stage.split('_')
704  totalSteps += int(steps)
705  self.totalMdSteps = totalSteps
706  self.mdStages = stages
707 
708  # Creates roothandle for hdf5 objects interacting with md trajectory
709  # Either we create a new trajectory in this file and read it back in the
710  # same run, or we are reading a saved trajectory
711  def createMdRootHandle(self):
712 
713  mdFileName = ""
714  outputDir = self.getParam("output_directory")
715 
716  if (self.getParam("read_assignments_from_file") == "no"):
717  # create new hdf5 file to which md output will be written
718  mdFileName = self.getParam("md_trajectory_output_file")
719  fileName = os.path.join(outputDir, mdFileName)
720  print "creating rmf file with filename %s" % fileName
721  rh = RMF.create_rmf_file(fileName)
722  my_kc = rh.add_category("my data")
723  IMP.rmf.add_hierarchy(rh, self.protein)
724 
725  self.rootHandle = rh
726  else:
727  # use existing hdf5 file
728  mdFileName = self.getParam("saved_md_filename")
729 
730  rh = RMF.open_rmf_file(mdFileName)
731  IMP.rmf.set_hierarchies(rh, [self.protein])
732 
733  self.rootHandle = rh
734 
735  def evaluateModel(self):
736  return self.model.evaluate(False)
737 
738  def loadMdHelpers(self):
739 
740  self.createMdRootHandle()
741 
742  self.createMdOptimizer()
743 
744  self.addMdOptimizerStates()
745 
746  self.initializeMdSchedule()
747 
748  self.createdMdOptimizer = 1
749 
750  # Create the optimizer object for running MD
751  def createMdOptimizer(self):
752  print "creating md object"
753  md = IMP.atom.MolecularDynamics(self.model)
754  # hack to get around bug where vx attributes in non-optimized particles
755  # are not being set
756  md.optimize(1)
757  md.set_velocity_cap(100.0)
758  self.mdOptimizer = md
759 
760  def addMdOptimizerStates(self):
761 
762  # VelocityScalingOptimizerState
764  IMP.atom.get_leaves(self.protein),
765  0) # temperature is set later
766  vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
767 
768  # SaveHierarchyConfigurationOptimizerState
769  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
770  [self.protein], self.rootHandle)
771  self.mdOptimizer.add_optimizer_state(hdos)
772 
773  self.hdos = hdos
774  self.vsos = vsos
775 
776  def readTrajectories(self):
777 
778  cgFileName = self.getParam("cg_output_file")
779  bestCgScore = 10000000
780  bestCgScoreFile = ""
781  bestCgRmsd = 10000000
782  bestCgRmsdFile = ""
783 
784  outputDir = self.getParam("output_directory")
785  trajectoryFile = self.getParam("md_trajectory_output_file")
786  fullFile = os.path.join(outputDir, trajectoryFile)
787  rh = RMF.open_rmf_file(fullFile)
788  IMP.rmf.set_hierarchies(rh, [self.protein])
789  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
790  rh, int(self.getParam("cg_interval")), self.protein)
791 
792  if (len(framesToRead) > 0):
793  for cgNumber in framesToRead:
794  # Open next cg trajectory
795  outputDir = self.getParam("output_directory")
796  fullCgFileName = os.path.join(
797  outputDir, "%s%s" %
798  (cgFileName, cgNumber))
799  rh = RMF.open_rmf_file(fullCgFileName)
800  IMP.rmf.set_hierarchies(rh, [self.protein])
801 
802  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
803  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
804  score = self.model.evaluate(False)
805  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
806  print "cg number %s has score %s rmsd %s" % (cgNumber, score, rmsd)
807  if (score < bestCgScore):
808  bestCgScore = score
809  bestCgScoreFile = fullCgFileName
810  if (rmsd < bestCgRmsd):
811  bestCgRmsd = rmsd
812  bestCgRmsdFile = fullCgFileName
813 
814  # output best score information
815  self.singlePdbResults(
816  bestCgScoreFile,
817  -1,
818  self.getParam("best_cg_score_output_file"))
819  self.singlePdbResults(
820  bestCgRmsdFile,
821  -1,
822  self.getParam("best_cg_rmsd_output_file"))
823  self.singlePdbResults(
824  "%s%s" %
825  (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file"))
826  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
827  self.bestCgScore = bestCgScore
828  self.bestCgRmsd = bestCgRmsd
829  self.finalCgRmsd = finalCgRmsd
830 
831  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
832 
833  fullTrajectoryFile = os.path.join(
834  self.getParam("output_directory"),
835  trajectoryFile)
836  fullOutputFile = os.path.join(
837  self.getParam("output_directory"),
838  outputPdbFile)
839  rh = RMF.open_rmf_file(fullTrajectoryFile)
840  IMP.rmf.set_hierarchies(rh, [self.protein])
841  if (frame == -1):
842  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
843  IMP.rmf.load_frame(rh, frame, self.protein)
844  IMP.atom.write_pdb(self.protein, fullOutputFile)
845 
846  def calculateRmsd(self, otherProtein, flexibleAtoms):
847  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
848  otherProtein)
849  otherVector = []
850  modelVector = []
851  for pName in otherNamesToParticles.keys():
852  if ((pName in flexibleAtoms) == 0):
853  continue
854  otherParticle = otherNamesToParticles[pName]
855  modelParticle = self.namesToParticles[pName]
856  otherVector.append(
858  otherParticle).get_coordinates(
859  ))
860  modelVector.append(
862  modelParticle).get_coordinates(
863  ))
864  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
865  return rmsd
866 
867  def calculateNativeRmsd(self, flexibleAtoms):
868 
869  if (self.wroteNativeProtein == 0):
870  pdbName = self.getParam("native_pdb_input_file")
871  self.nativeModel = IMP.kernel.Model()
872  self.nativeProtein = IMP.atom.read_pdb(
873  pdbName,
874  self.nativeModel,
876  self.wroteNativeProtein = 1
877 
878  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
879 
880  # Run MD according to the temperature schedule read in from parameters
881  def runMolecularDynamics(self):
882  self.initializedFlexFlexLjRestraints = 0
883  if (self.fixedNonFlexibleAtoms == 0):
884  print "error: before running md, must fix non flexible atoms"
885  sys.exit()
886  stepsSoFar = 0
887  for stage in self.mdStages:
888  [steps, temperature, excludedVolume] = stage.split('_')
889  stepsSoFar += int(steps)
890  self.vsos.set_temperature(int(temperature))
891  if (excludedVolume == "1"): # add lennard jones restraints between flexible atoms and fixed atoms (don't add between flex atoms)
892  self.addFixedFlexibleLjRestraints()
893  elif (excludedVolume != "0"):
894  # add lennard jones restraints between flexible atoms
895  if (self.getParam("excluded_volume_type") == "lj"):
896  self.addFlexFlexLjRestraints(float(excludedVolume))
897  elif (self.getParam("excluded_volume_type") == "ev"):
898  self.addExcludedVolumeRestraints(
899  ) # add full excluded volume restraints (just take everything in the protein)
900  else:
901  "please set excluded_volume_type to either lj or ev"
902  sys.exit()
903  print "running md at temperature %s for %s steps" % (temperature, steps)
904  self.mdOptimizer.optimize(int(steps))
905 
907  self.protein,
908  os.path.join(
909  self.getParam(
910  "output_directory"),
911  "md_after_%s.pdb" %
912  stepsSoFar))
913  print "model score after stage %s is %s" % (stage, self.model.evaluate(False))
914  print "done running md"
915 
916  def runAllCg(self):
917  cgInterval = int(self.getParam("cg_interval"))
918  outputDir = self.getParam("output_directory")
919  trajectoryFile = self.getParam("md_trajectory_output_file")
920  fullFile = os.path.join(outputDir, trajectoryFile)
921  print "open rmf %s" % fullFile
922  rh = RMF.open_rmf_file(fullFile)
923  IMP.rmf.set_hierarchies(rh, [self.protein])
924  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
925  rh, cgInterval, self.protein)
926 
927  for i in framesToRead:
928  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
929  self.applyCg(i, cgFileName)
930 
931  # Apply CG to each step of the saved MD trajectory. Writes results to same
932  # file storing the MD trajectory
933  def applyCg(self, mdStepStart, cgFileName):
934 
935  cgSteps = int(self.getParam("cg_steps"))
936 
937  # load md step specified by calling object
938  print "apply cg: loading md frame for cg step start %s" % mdStepStart
939  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
940 
941  # create new hdf5 file to which cg output will be written
942  outputDir = self.getParam("output_directory")
943  fileName = os.path.join(outputDir, cgFileName)
944  print "creating cg hdf5 file %s" % fileName
945  rh = RMF.create_rmf_file(fileName)
946  my_kc = rh.add_category("my data")
947  IMP.rmf.add_hierarchy(rh, self.protein)
948 
949  # apply cg
950  cg = IMP.core.ConjugateGradients(self.model)
951  firstScore = self.model.evaluate(False)
952 
953  print "creating optimizer state"
954  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
955  [self.protein], rh)
956  hdos.set_skip_steps(0)
957  # hdos is the optimizer state writing configurations to disk
958  cg.add_optimizer_state(hdos)
959 
960  print "running cg"
961  cg.optimize(cgSteps)
962  secondScore = self.model.evaluate(False)
963  print "cg score after md step %s before %s after %s" % (mdStepStart, firstScore, secondScore)
964  return secondScore
965 
966  # Tell the optimizer not to move atoms we have determined ar fixed
967  def fixNonFlexibleAtoms(self):
968  if (self.createdMdOptimizer == 0):
969  self.createMdOptimizer()
970  # print "ERROR: must create md optimizer and optimize once before set_particles_are_optimized is called"
971  # sys.exit()
972 
973  self.fixedNonFlexibleAtoms = 1
974  leaves = IMP.atom.get_leaves(self.protein)
975  for leaf in leaves:
976  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
977  xyzDecorator = IMP.core.XYZ.decorate_particle(leaf)
978  xyzDecorator.set_coordinates_are_optimized(0)
979 
980  def writeOutput(self):
981  restraintCount = self.model.get_number_of_restraints()
982  leaves = IMP.atom.get_leaves(self.protein)
983  flexiblePeptideAtoms = 0
984  flexibleProteinAtoms = 0
985  fixedPeptideAtoms = 0
986  fixedProteinAtoms = 0
987 
988  for leaf in leaves:
989  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
990  if (self.getChain(leaf) == self.getParam("peptide_chain")):
991  fixedPeptideAtoms += 1
992  else:
993  fixedProteinAtoms += 1
994  else:
995  if (self.getChain(leaf) == self.getParam("peptide_chain")):
996  flexiblePeptideAtoms += 1
997  else:
998  flexibleProteinAtoms += 1
999  print "Flexible peptide atoms: %s" % flexiblePeptideAtoms
1000  print "Fixed peptide atoms: %s" % fixedPeptideAtoms
1001  print "Flexible protein atoms: %s" % flexibleProteinAtoms
1002  print "Fixed protein atoms: %s" % fixedProteinAtoms
1003  print "Total number of restraints: %s" % restraintCount
1004 
1005  def writeOsOutput(self):
1006 
1007  print "Best cg score: %s" % self.bestCgScore
1008  print "best cg rmsd: %s" % self.bestCgRmsd
1009  print "final cg rmsd: %s" % self.finalCgRmsd
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Score the angle based on a UnaryFunction,.
static XYZR decorate_particle(::IMP::kernel::Particle *p)
Definition: XYZR.h:47
Simple conjugate gradients optimizer.
algebra::BoundingBoxD< 3 > get_bounding_box(const Hierarchy &h)
Get a bounding box for the Hierarchy.
void load_frame(RMF::FileConstHandle file, unsigned int frame)
Definition: frames.h:27
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_rmsd(const core::XYZs &s0, const core::XYZs &s1, const IMP::algebra::Transformation3D &tr_for_second)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:88
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
static Float get_k_from_standard_deviation(Float sd, Float t=297.15)
Return the k to use for a given Gaussian standard deviation.
Definition: core/Harmonic.h:62
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:63
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
Lennard-Jones score between a pair of particles.
Simple molecular dynamics optimizer.
Store a kernel::ParticleIndexes.
static Atom decorate_particle(::IMP::kernel::Particle *p)
Definition: atom/Atom.h:241
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
Score the bond based on a UnaryFunction,.
See IMP.core for more information.
See IMP.algebra for more information.
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
static XYZ decorate_particle(::IMP::kernel::Particle *p)
Definition: XYZ.h:49
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
compute the distance between two vectors
Definition: VectorD.h:209
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Smooth interaction scores by switching the derivatives (force switch).
void read_pdb(base::TextInput input, int model, Hierarchy h)
Hierarchies get_leaves(const Selection &h)
See IMP.rmf for more information.
Definition: associations.h:20
See IMP.domino for more information.
Definition: analysis.h:15
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:72
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24