IMP  2.3.1
The Integrative Modeling Platform
peptideDocker.py
1 import IMP
2 import subprocess
3 import random
4 import IMP.domino
5 import IMP.core
6 import IMP.rmf
7 import RMF
8 
9 import time
10 import IMP.algebra
11 import types
12 import re
13 import sys
14 import operator
15 import os
16 import atomicDominoUtilities
17 
18 
19 class PeptideDocker:
20 
21  def __init__(self, parameterFileName):
22  self.createdMdOptimizer = 0
23  self.fixedNonFlexibleAtoms = 0
24  self.readParameters(parameterFileName)
25  self.wroteNativeProtein = 0
26 
27  def loadHelpers(self):
28  self.loadDockingHelpers()
29  self.loadMdHelpers()
30 
31  def readParameters(self, parameterFileName):
32  self.parameters = atomicDominoUtilities.readParameters(
33  parameterFileName)
34 
35  # Processing specific to peptide docking and this script
36  def loadDockingHelpers(self):
37  outputDir = self.getParam("output_directory")
38  mkdirProcess = subprocess.Popen(
39  ['mkdir',
40  '-p',
41  self.getParam('output_directory')],
42  shell=False,
43  stderr=subprocess.PIPE)
44  output = mkdirProcess.communicate()
45 
46  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
47  self.protein)
48 
49  self.times = []
50  self.rawTimes = []
51 
52  # Helper method; create dictionary where the keys are names of all
53  # particles in the system and the values are themselves dictionaries
54  def initializeParticleNameDict(self, leaves):
55  particleDict = {}
56 
57  for leaf in leaves:
58  name = self.quickParticleName(leaf)
59  particleDict[name] = {}
60  return particleDict
61 
62  def getModel(self):
63  return self.model
64 
65  def getProtein(self):
66  return self.protein
67 
68  def createModel(self):
69 
70  pdbName = self.getParam("native_pdb_input_file")
71 
72  self.model = IMP.kernel.Model()
73 
74  self.protein = IMP.atom.read_pdb(
75  pdbName,
76  self.model,
78 
79  # atom radii, forcefield topology, etc. -- no restraints created here
80 
82  IMP.atom.get_data_path("par.lib"))
83 
84  topology = ff.create_topology(self.protein)
85  topology.apply_default_patches()
86 
87  topology.add_atom_types(self.protein)
88 
89  ff.add_radii(self.protein)
90  ff.add_well_depths(self.protein)
91  self.ff = ff
92  self.topology = topology
93 
94  # Get parameter value. Throws dictionary exception if not found
95  def getParam(self, paramName):
96  paramValue = self.parameters[paramName]
97  return paramValue
98 
99  # Peptide-docking friendly way to log which restraint we're dealing with
100  def quickRestraintName(self, r):
101  nameList = []
102  ps = r.get_input_particles()
103 
104  for p in ps:
105  if (self.isAtomParticle(p) == 0):
106  continue
107  isFlexible = 0
108  if ((self.quickParticleName(p) in self.flexibleAtoms) == 1):
109  isFlexible = 1
110  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
111 
112  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
113 
114  def quickParticleName(self, particle):
115  return atomicDominoUtilities.quickParticleName(particle)
116 
117  # output current time
118  def logTime(self, label):
119  nextTime = time.asctime(time.localtime(time.time()))
120  print "%s:\t %s" % (label, nextTime)
121  nextTimeEntry = [label, nextTime]
122  self.times.append(nextTimeEntry)
123  self.rawTimes.append(time.time())
124 
125  # output all times that were saved by logTime()
126  def outputTimes(self):
127  print "Time for each step:"
128  for timeEntry in self.times:
129  label = timeEntry[0]
130  value = timeEntry[1]
131  print "%s:\t%s" % (label, value)
132 
133  def getFlexibleAtoms(self):
134  return self.flexibleAtoms
135 
136  # Helper method to quickly get the chain identifier for a particle
137  def getChain(self, particle):
138  atomDecorator = IMP.atom.Atom(particle)
139  residue = IMP.atom.get_residue(atomDecorator)
140  chain = IMP.atom.get_chain(residue)
141  chainId = chain.get_id()
142  return chainId
143 
144  # Helper method to quickly get the residue index for a particle
145  def getResidue(self, particle):
146  atomDecorator = IMP.atom.Atom(particle)
147  residue = IMP.atom.get_residue(atomDecorator)
148  residueNumber = residue.get_index()
149  return residueNumber
150 
151  def isAtomParticle(self, p):
152  return atomicDominoUtilities.isAtomParticle(p)
153 
154  # Read all particles in restraint; return 1 if all are flexible and 0 if
155  # at least one atom is fixed
156  def allFlexibleAtoms(self, r):
157  ps = r.get_input_particles()
158  for p in ps:
159  if (self.isAtomParticle(p) == 0):
160  continue
161  if ((self.quickParticleName(p) in self.flexibleAtoms) == 0):
162  return 0
163  return 1
164 
165  # Check if we should skip processing of this pair of particles. Don't add restraints between particles that aren't optimized,
166  # and don't add restraints between particles on the same residue (for now
167  # assume stereochemistry is good enough for this)
168  def skipNonBondedPair(self, pair):
169 
170  # skip if same residue
171  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
172  return 1
173 
174  # skip if neither is flexible
175  if ((self.quickParticleName(pair[0]) in self.flexibleAtoms) == 0 and (self.quickParticleName(pair[1]) in self.flexibleAtoms) == 0):
176  return 1
177  return 0
178 
179  # Set up degrees of freedom for the system. Either we are dealing with a fixed protein or a flexible one.
180  # Determine which atoms are flexible and save them in self.flexibleAtoms
181  def initDof(self):
182 
183  peptideChain = self.getParam("peptide_chain")
184  self.flexibleAtoms = {}
185 
186  if (self.getParam("flexible_protein") == "yes"):
187  # All atoms within range of the peptide are flexible; range defined
188  # by close_pair_distance param
189  particlePairs = self.getClosePairs()
190 
191  for pair in particlePairs:
192  p0 = pair[0]
193  p1 = pair[1]
194  p0Chain = self.getChain(p0)
195  p1Chain = self.getChain(p1)
196  if (p0Chain == peptideChain or p1Chain == peptideChain):
197  self.flexibleAtoms[self.quickParticleName(p0)] = 1
198  self.flexibleAtoms[self.quickParticleName(p1)] = 1
199 
200  else:
201  # only peptide atoms are flexible, determined by the chain id
202  leaves = IMP.atom.get_leaves(self.protein)
203  for leaf in leaves:
204  if (self.getChain(leaf) == peptideChain):
205  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
206  self.fixNonFlexibleAtoms()
207 
208  # return a dictionary where the keys are the names of the fixed atoms in
209  # the system
210  def getFixedAtoms(self):
211  leaves = IMP.atom.get_leaves(self.protein)
212  fixedAtoms = {}
213  for leaf in leaves:
214  leafName = self.quickParticleName(leaf)
215  if (leafName in self.flexibleAtoms) == 0:
216  fixedAtoms[leafName] = 1
217  return fixedAtoms
218 
219  # for each flexible atom, give it the coordinates of another flexible atom (ensures that each atom starts out in the binding site)
220  # Although it does guarantee a very random initial configuration, it is
221  # cheating since we are using knowledge of the native state
222  def initializeRandomExisting(self):
223  leaves = IMP.atom.get_leaves(self.protein)
224  particlesToVary = []
225  xyzs = []
226  for leaf in leaves:
227  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 1):
228  particlesToVary.append(self.quickParticleName(leaf))
229  atomToCoordinates = {}
230  for pName in particlesToVary:
231  randomPname = random.choice(particlesToVary)
232  randomP = self.namesToParticles[randomPname]
233  print "setting coordinates for particle %s to the ones currently set for particle %s" % (pName, randomPname)
234  atomToCoordinates[pName] = IMP.core.XYZ(randomP).get_coordinates()
235 
236  for pName in particlesToVary:
237  particle = self.namesToParticles[pName]
238  xyz = IMP.core.XYZ(particle)
239  xyz.set_coordinates(atomToCoordinates[pName])
240  xyzs.append(xyz)
241  return xyzs
242 
243  # Set initial positions of flexible atoms in the system. We are
244  # experimenting with the best way to do this
245  def setInitialPositions(self):
246  initialPositionMethod = self.getParam("initial_position_method")
247 
248  # set random coordinates for atom by creating bounding box around native coordinates, increasing it
249  # by defined offset plus random number (necessary when running on
250  # cluster)
251  if (initialPositionMethod == "random"):
252 
253  myRandom = random.random()
254  initialOffset = float(
255  self.getParam("initial_atom_offset")) + myRandom
256  for particleName in self.flexibleAtoms.keys():
257  p = self.namesToParticles[particleName]
258  xyzDecorator = IMP.core.XYZ(p)
259  coordinates = xyzDecorator.get_coordinates()
260  bb = IMP.algebra.BoundingBox3D(coordinates)
261  bb += initialOffset
262  bb += myRandom
263 
264  randomXyz = IMP.algebra.get_random_vector_in(bb)
265  xyzDecorator.set_coordinates(randomXyz)
266 
267  # for each flexible atom, give it the coordinates of another flexible
268  # atom (ensures that each atom starts out in the binding site)
269  elif (initialPositionMethod == "random_existing"):
270  print "start random existing"
271  xyzs = self.initializeRandomExisting()
272 
273  # same as random_existing except offset each atom a little (doesn't
274  # seem to make too much of a difference)
275  elif (initialPositionMethod == "random_existing_box"):
276  xyzs = self.initializeRandomExisting()
277  initialOffset = float(self.getParam("initial_atom_offset"))
278  for xyzDecorator in xyzs:
279  coordinates = xyzDecorator.get_coordinates()
280  bb = IMP.algebra.BoundingBox3D(coordinates)
281  bb += initialOffset
282 
283  randomXyz = IMP.algebra.get_random_vector_in(bb)
284  xyzDecorator.set_coordinates(randomXyz)
285 
286  elif (initialPositionMethod == "random_box"):
287 
288  # Get bounding box containing all particles within close pair distance of flexible atoms
289  # This doesn't work perfectly; can put initial positions in the fixed atoms accidentally
290  # As constructed it is specific to peptide atoms only and not
291  # flexible protein atoms
292  bsParticles = []
293 
294  for firstPname in self.nonBondedPairs.keys():
295  secondPnames = self.nonBondedPairs[firstPname]
296  for secondPname in secondPnames.keys():
297  if ((secondPname in self.flexibleAtoms) == 0):
298  fixedAtom = self.namesToParticles[firstPname]
299 
300  bsParticles.append(IMP.core.XYZ(fixedAtom))
301  print "added next close atom %s to bounding box particles" % firstPname
302  # bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert
303  # if I can figure out how
304  peptideParticles = []
305  leaves = IMP.atom.get_leaves(self.protein)
306  for leaf in leaves:
307  if (self.getChain(leaf) == self.getParam("peptide_chain")):
308  peptideParticles.append(IMP.core.XYZ(leaf))
309  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
310  # set initial position for each flexible atoms, and adjust it if it
311  # is too close to a fixed residue
312  fixedAtoms = self.getFixedAtoms()
313  for flexPname in self.flexibleAtoms.keys():
314  goodPosition = 0
315  flexParticle = self.namesToParticles[flexPname]
316  flexXyzDecorator = IMP.core.XYZ(flexParticle)
317  print "processing position for pname %s" % flexPname
318  while(goodPosition == 0):
319  goodPosition = 1
320  flexXyzDecorator.set_coordinates(
321  IMP.algebra.get_random_vector_in(bsBoundingBox))
322  for fixedPname in fixedAtoms.keys():
323  fixedParticle = self.namesToParticles[fixedPname]
324  distance = IMP.algebra.get_distance(
325  IMP.core.XYZ(fixedParticle).get_coordinates(),
326  IMP.core.XYZ(flexParticle).get_coordinates())
327  if (distance < 2):
328  print "finding new position for %s" % flexPname
329  goodPosition = 0
330  # start the while loop over which will set new
331  # coordinates
332  break
333 
334  # take a box around the whole protein and randomly initialize atoms
335  # here
336  elif (initialPositionMethod == "random_full"):
337  bb = IMP.atom.get_bounding_box(self.protein)
338  for particleName in self.flexibleAtoms.keys():
339  print "creating random position for particle %s" % particleName
340  p = self.namesToParticles[particleName]
341  xyzDecorator = IMP.core.XYZ(p)
342  randomXyz = IMP.algebra.get_random_vector_in(bb)
343  myRandom = random.random()
344  bb += myRandom
345  xyzDecorator.set_coordinates(randomXyz)
346 
347  # Read in a file generated from a previous run (fastest way is to
348  # create a new model and copy corresponding XYZ Decorators)
349  elif (initialPositionMethod == "file"):
350 
351  initialPositionFile = self.getParam("saved_initial_atom_positions")
352  print "reading initial positions from %s" % initialPositionFile
353  initialModel = IMP.kernel.Model()
354  initialProtein = IMP.atom.read_pdb(
355  initialPositionFile,
356  initialModel,
358  initialLeaves = IMP.atom.get_leaves(initialProtein)
359  for initialLeaf in initialLeaves:
360  name = self.quickParticleName(initialLeaf)
361  if ((name in self.namesToParticles) == 1):
362  existingLeaf = self.namesToParticles[name]
363  existingLeafXyz = IMP.core.XYZ(existingLeaf)
364  initialLeafXyz = IMP.core.XYZ(initialLeaf)
365  existingLeafXyz.set_coordinates(
366  initialLeafXyz.get_coordinates())
367  else:
368  print "Read in initial positions from file %s but this file contained particle %s which is not in current model" % (initialPositionFile, name)
369  else:
370  print "Please specify valid initial position method (random or file)\n"
371  sys.exit()
372 
373  # output initial positions
374  outputDir = self.getParam("output_directory")
375  initialAtomPositionFile = self.getParam(
376  "initial_atom_positions_output_file")
377  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
378  print "writing output to file %s" % fullOutputFile
379  IMP.atom.write_pdb(self.protein, fullOutputFile)
380 
381  # get initial score for model
382  initialScore = self.model.evaluate(False)
383  print "initial score for model is %s" % initialScore
384 
385  # Get non-bonded pairs in the system in preparation for restraining them
386  def getNonBondedPairs(self):
387  nonBondedDefinition = self.getParam("non_bonded_definition")
388  particlePairs = []
389  if (nonBondedDefinition == "close_pairs"):
390  particlePairs = self.getClosePairs()
391  elif (nonBondedDefinition == "random"):
392  particlePairs = self.getRandomParticlePairs()
393  elif (nonBondedDefinition == "manual"):
394  particlePairs = self.readManualRestraints(nativeProtein, 6)
395  else:
396  print "Please specify valid non bonded pair definition"
397  sys.exit()
398  return particlePairs
399 
400  # Get 3D distance between the particles in the pair
401  def getXyzDistance(self, pair):
402  d0 = IMP.core.XYZ(pair[0])
403  d1 = IMP.core.XYZ(pair[1])
404  distance = IMP.core.get_distance(d0, d1)
405  return distance
406 
407  # Get all pairs of particles within the distance defined by
408  # close_pair_distance parameter
409  def getClosePairs(self):
410 
411  # Get a list of all atoms in the protein, and put it in a container
412  leaves = IMP.atom.get_leaves(self.protein)
414  closePairDistance = float(self.getParam("close_pair_distance"))
415  # sometimes closePairContainer returns particles > distanceCutoff, even
416  # with slack = 0
417  useHardCutoff = self.getParam("use_hard_distance_cutoff")
418 
419  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
420 
421  self.model.evaluate(False)
422 
423  particlePairs = nbl.get_particle_pairs()
424  finalPairs = []
425  for pair in particlePairs:
426  distance = self.getXyzDistance(pair)
427  q0 = self.quickParticleName(pair[0])
428  q1 = self.quickParticleName(pair[1])
429 
430  if (useHardCutoff == "0" or (useHardCutoff == "1" and distance < closePairDistance)):
431  finalPairs.append([pair[0], pair[1]])
432 
433  return finalPairs
434 
435  # Add restraints for bond lengths, angles, dihedrals, and impropers
436  def addForceFieldRestraints(self):
437 
438  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
439  if (useForcefieldRestraints == "1"):
440 
442  IMP.atom.get_data_path("top_heav.lib"),
443  IMP.atom.get_data_path("par.lib"))
444 
445  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
446  # impropers) of the CHARMM forcefield
447  bonds = self.topology.add_bonds(self.protein)
448  angles = ff.create_angles(bonds)
449  dihedrals = ff.create_dihedrals(bonds)
450  impropers = self.topology.add_impropers(self.protein)
451 
452  # tracks which particles are together in forcefield restraints
453  # (useful for filtering excluded volume)
454  self.ffParticleGroups = self.initializeParticleNameDict(
455  IMP.atom.get_leaves(self.protein))
456 
457  self.createForceFieldRestraintsForModel(
458  bonds,
459  "bonds",
460  "Bond Restraints",
463  0,
464  1)))
465  self.createForceFieldRestraintsForModel(
466  angles,
467  "angles",
468  "Angle Restraints",
471  0,
472  1)))
473  self.createForceFieldRestraintsForModel(
474  dihedrals,
475  "dihedrals",
476  "Dihedral Restraints",
478  self.createForceFieldRestraintsForModel(
479  impropers,
480  "impropers",
481  "Improper Restraints",
484  0,
485  1)))
486 
487  else:
488  print ("Skipping forcefield restraints")
489 
490  # Create one type of forcefield restraint. The restraint is initially across the full model and needs to be decomposed
491  # so it can be used in the restraint graph.
492  def createForceFieldRestraintsForModel(
493  self,
494  ffParticles,
495  containerName,
496  restraintName,
497  singletonScore):
498 
499  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
500  cont = IMP.container.ListSingletonContainer(self.model, containerName)
501  cont.add_particles(ffParticles)
502  listBondRestraint = IMP.container.SingletonsRestraint(
503  singletonScore, cont, restraintName)
504  self.model.add_restraint(listBondRestraint)
505 
506  # Restraint decomposition has been a little touchy; a couple
507  # workarounds contained
508  decomposedRestraintTemp = listBondRestraint.create_decomposition()
509  decomposedRestraints = IMP.kernel.RestraintSet.get_from(
510  decomposedRestraintTemp)
511  rs_restraints = decomposedRestraints.get_restraints()
512 
513  # manually remove the full restraint after getting the decomposition
514  self.model.remove_restraint(listBondRestraint)
515 
516  count = decomposedRestraints.get_number_of_restraints()
517  for i in range(count):
518 
519  r = decomposedRestraints.get_restraint(i)
520 
521  if (self.allFlexibleAtoms(r) == 1):
522  # add the decomposed restraint to the model
523  self.model.add_restraint(r)
524  self.model.set_maximum_score(r, maxForceFieldScore)
525  self.addParticlesToGroups(r)
526  else:
527  t = 1
528  # self.model.remove_restraint(r)
529 
530  # Add all atoms in r to ffParticleGroups; tracks which atoms are restrained by forcefield terms so
531  # we don't restrain them in other methods (e.g. excluded volume) --
532  # essentially a manual pair-filter
533  def addParticlesToGroups(self, r):
534  inputParticles = []
535  ps = r.get_input_particles()
536 
537  for p in ps:
538 
539  if (self.isAtomParticle(p) == 0): # skip particles that aren't actually atoms (e.g. bond particles)
540  continue
541  inputParticles.append(p.get_name())
542 
543  for firstName in inputParticles:
544  for secondName in inputParticles:
545  if (firstName == secondName):
546  continue
547  self.ffParticleGroups[firstName][secondName] = 1
548 
549  # Add excluded volume restraints across close pairs. Still a work in progress as they have been slowing down
550  # the code quite a bit and we'll probably want to turn them on only at a
551  # defined time
552  def addExcludedVolumeRestraints(self):
553 
554  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
555  self.protein, self.model, self.namesToParticles)
556  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
557 
558  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
559  self.model.add_restraint(evr)
560 
561  # Add lennard jones restraints between flexible atoms if they haven't been
562  # added already, and then scale radii of flexible atoms by input scaling
563  # factor
564  def addFlexFlexLjRestraints(self, scalingFactor):
565  if (self.initializedFlexFlexLjRestraints == 0):
566  print "adding initial lennard jones restraints between pairs of flexible atoms"
567  self.initializedFlexFlexLjRestraints = 1
568  leaves = IMP.atom.get_leaves(self.protein)
569  counter = 0
570 
571  sf = IMP.atom.ForceSwitch(6.0, 7.0)
573  for i in range(len(leaves)):
574  iLeaf = leaves[i]
575  for j in range(i + 1, len(leaves)):
576  jLeaf = leaves[j]
577  if ((self.quickParticleName(iLeaf) in self.flexibleAtoms) == 1 and (self.quickParticleName(jLeaf) in self.flexibleAtoms) == 1):
578  ljpr = IMP.core.PairRestraint(
579  ps, [iLeaf, jLeaf], "LennardJones %s_%s" %
580  (i, j))
581  self.model.add_restraint(ljpr)
582  chainHierarchies = IMP.atom.get_by_type(
583  self.protein, IMP.atom.CHAIN_TYPE)
584  peptideChainHierarchy = None
585  print "scaling atomic radii by scaling factor %s" % scalingFactor
586  # reset radii back to original in preparation for rescaling
587  self.ff.add_radii(self.protein)
588  for pName in self.flexibleAtoms.keys():
589  particle = self.namesToParticles[pName]
590  xyzr = IMP.core.XYZR(particle)
591  radius = xyzr.get_radius()
592  radius *= scalingFactor
593  xyzr.set_radius(radius)
594 
595  # add lennard-jones restraints between all fixed atoms and all flexible
596  # atoms
597  def addFixedFlexibleLjRestraints(self):
598  print "adding initial fixed flexible lj restraints"
599  fixedAtoms = self.getFixedAtoms()
600  sf = IMP.atom.ForceSwitch(6.0, 7.0)
602 
603  for flexPname in self.flexibleAtoms.keys():
604  flexParticle = self.namesToParticles[flexPname]
605  for fixedPname in fixedAtoms.keys():
606  fixedParticle = self.namesToParticles[fixedPname]
607  ljpr = IMP.core.PairRestraint(
608  ps, [fixedParticle, flexParticle], "LennardJones %s_%s" %
609  (flexPname, fixedPname))
610  self.model.add_restraint(ljpr)
611 
612  # actually do the work to create the DOPE restraint
613  def createDopeRestraint(self, pair):
614  q0 = self.quickParticleName(pair[0])
615  q1 = self.quickParticleName(pair[1])
616  # create restraint around this distance, add to model
617  prName = "NonBonded %s_%s" % (q0, q1)
618  pairRestraint = IMP.core.PairRestraint(
619  self.dopePairScore, pair, prName)
620 
621  # print "Added non-bonded restraint between %s and %s" % (q0, q1)
622  self.model.add_restraint(pairRestraint)
623  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
624  self.model.set_maximum_score(pairRestraint, maxNonBondedScore)
625  #initialScore = pairRestraint.evaluate(0)
626 
627  # add non-bonded restraints across close pairs. At least one atom in each
628  # pair should be flexible
629  def addClosePairNonBondedRestraints(self):
630  particlePairs = self.getNonBondedPairs()
631 
632  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
633  dopeCutoff = float(self.getParam("dope_cutoff"))
634 
636 
637  ps = 0
638  if (nonBondedRestraintType == "dope"):
639  IMP.atom.add_dope_score_data(self.protein)
640  ps = IMP.atom.DopePairScore(dopeCutoff)
641  self.dopePairScore = ps
642  nonBondedCount = 0
643  self.nonBondedPairs = self.initializeParticleNameDict(
644  IMP.atom.get_leaves(self.protein))
645  for pair in particlePairs:
646 
647  if (self.skipNonBondedPair(pair) == 1):
648  continue
649 
650  self.createDopeRestraint(pair)
651 
652  nonBondedCount += 1
653  self.nonBondedPairs[self.quickParticleName(pair[0])][
654  self.quickParticleName(pair[0])] = 1
655  self.nonBondedPairs[self.quickParticleName(pair[1])][
656  self.quickParticleName(pair[1])] = 1
657 
658  print "Added %s non-bonded restraints" % nonBondedCount
659  self.model.evaluate(False)
660 
661  def addCompleteNonBondedRestraints(self):
662  leaves = IMP.atom.get_leaves(self.protein)
663  counter = 0
664 
665  for i in range(len(leaves)):
666  iLeaf = leaves[i]
667  for j in range(i + 1, len(leaves)):
668  jLeaf = leaves[j]
669 
670  if ((self.quickParticleName(jLeaf) in self.nonBondedPairs[self.quickParticleName(iLeaf)]) == 1):
671  continue
672  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
673  continue
674 
675  self.createDopeRestraint([iLeaf, jLeaf])
676  counter += 1
677  # print "added complete restraint between %s and %s" %
678  # (self.quickParticleName(iLeaf),
679  # self.quickParticleName(jLeaf))
680  print "added %s restraints for %s particles" % (counter, len(leaves))
681  self.model.evaluate(False)
682 
683  # lots of output on all restraints
684  def writeAllRestraints(self):
685  print "write all restraints:"
686  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
687  print self.quickRestraintName(r)
688 
689  # Read parameterss with md temperature / step schedule and save them
690  def initializeMdSchedule(self):
691  schedule = self.getParam("md_schedule")
692  stages = schedule.split()
693  totalSteps = 0
694  for stage in stages:
695  print "unpacking stage %s" % stage
696  [steps, temperature, excludedVolume] = stage.split('_')
697  totalSteps += int(steps)
698  self.totalMdSteps = totalSteps
699  self.mdStages = stages
700 
701  # Creates roothandle for hdf5 objects interacting with md trajectory
702  # Either we create a new trajectory in this file and read it back in the
703  # same run, or we are reading a saved trajectory
704  def createMdRootHandle(self):
705 
706  mdFileName = ""
707  outputDir = self.getParam("output_directory")
708 
709  if (self.getParam("read_assignments_from_file") == "no"):
710  # create new hdf5 file to which md output will be written
711  mdFileName = self.getParam("md_trajectory_output_file")
712  fileName = os.path.join(outputDir, mdFileName)
713  print "creating rmf file with filename %s" % fileName
714  rh = RMF.create_rmf_file(fileName)
715  my_kc = rh.add_category("my data")
716  IMP.rmf.add_hierarchy(rh, self.protein)
717 
718  self.rootHandle = rh
719  else:
720  # use existing hdf5 file
721  mdFileName = self.getParam("saved_md_filename")
722 
723  rh = RMF.open_rmf_file(mdFileName)
724  IMP.rmf.set_hierarchies(rh, [self.protein])
725 
726  self.rootHandle = rh
727 
728  def evaluateModel(self):
729  return self.model.evaluate(False)
730 
731  def loadMdHelpers(self):
732 
733  self.createMdRootHandle()
734 
735  self.createMdOptimizer()
736 
737  self.addMdOptimizerStates()
738 
739  self.initializeMdSchedule()
740 
741  self.createdMdOptimizer = 1
742 
743  # Create the optimizer object for running MD
744  def createMdOptimizer(self):
745  print "creating md object"
746  md = IMP.atom.MolecularDynamics(self.model)
747  # hack to get around bug where vx attributes in non-optimized particles
748  # are not being set
749  md.optimize(1)
750  md.set_velocity_cap(100.0)
751  self.mdOptimizer = md
752 
753  def addMdOptimizerStates(self):
754 
755  # VelocityScalingOptimizerState
757  IMP.atom.get_leaves(self.protein),
758  0) # temperature is set later
759  vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
760 
761  # SaveHierarchyConfigurationOptimizerState
762  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
763  [self.protein], self.rootHandle)
764  self.mdOptimizer.add_optimizer_state(hdos)
765 
766  self.hdos = hdos
767  self.vsos = vsos
768 
769  def readTrajectories(self):
770 
771  cgFileName = self.getParam("cg_output_file")
772  bestCgScore = 10000000
773  bestCgScoreFile = ""
774  bestCgRmsd = 10000000
775  bestCgRmsdFile = ""
776 
777  outputDir = self.getParam("output_directory")
778  trajectoryFile = self.getParam("md_trajectory_output_file")
779  fullFile = os.path.join(outputDir, trajectoryFile)
780  rh = RMF.open_rmf_file(fullFile)
781  IMP.rmf.set_hierarchies(rh, [self.protein])
782  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
783  rh, int(self.getParam("cg_interval")), self.protein)
784 
785  if (len(framesToRead) > 0):
786  for cgNumber in framesToRead:
787  # Open next cg trajectory
788  outputDir = self.getParam("output_directory")
789  fullCgFileName = os.path.join(
790  outputDir, "%s%s" %
791  (cgFileName, cgNumber))
792  rh = RMF.open_rmf_file(fullCgFileName)
793  IMP.rmf.set_hierarchies(rh, [self.protein])
794 
795  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
796  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
797  score = self.model.evaluate(False)
798  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
799  print "cg number %s has score %s rmsd %s" % (cgNumber, score, rmsd)
800  if (score < bestCgScore):
801  bestCgScore = score
802  bestCgScoreFile = fullCgFileName
803  if (rmsd < bestCgRmsd):
804  bestCgRmsd = rmsd
805  bestCgRmsdFile = fullCgFileName
806 
807  # output best score information
808  self.singlePdbResults(
809  bestCgScoreFile,
810  -1,
811  self.getParam("best_cg_score_output_file"))
812  self.singlePdbResults(
813  bestCgRmsdFile,
814  -1,
815  self.getParam("best_cg_rmsd_output_file"))
816  self.singlePdbResults(
817  "%s%s" %
818  (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file"))
819  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
820  self.bestCgScore = bestCgScore
821  self.bestCgRmsd = bestCgRmsd
822  self.finalCgRmsd = finalCgRmsd
823 
824  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
825 
826  fullTrajectoryFile = os.path.join(
827  self.getParam("output_directory"),
828  trajectoryFile)
829  fullOutputFile = os.path.join(
830  self.getParam("output_directory"),
831  outputPdbFile)
832  rh = RMF.open_rmf_file(fullTrajectoryFile)
833  IMP.rmf.set_hierarchies(rh, [self.protein])
834  if (frame == -1):
835  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
836  IMP.rmf.load_frame(rh, frame, self.protein)
837  IMP.atom.write_pdb(self.protein, fullOutputFile)
838 
839  def calculateRmsd(self, otherProtein, flexibleAtoms):
840  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
841  otherProtein)
842  otherVector = []
843  modelVector = []
844  for pName in otherNamesToParticles.keys():
845  if ((pName in flexibleAtoms) == 0):
846  continue
847  otherParticle = otherNamesToParticles[pName]
848  modelParticle = self.namesToParticles[pName]
849  otherVector.append(
850  IMP.core.XYZ(otherParticle).get_coordinates())
851  modelVector.append(
852  IMP.core.XYZ(modelParticle).get_coordinates())
853  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
854  return rmsd
855 
856  def calculateNativeRmsd(self, flexibleAtoms):
857 
858  if (self.wroteNativeProtein == 0):
859  pdbName = self.getParam("native_pdb_input_file")
860  self.nativeModel = IMP.kernel.Model()
861  self.nativeProtein = IMP.atom.read_pdb(
862  pdbName,
863  self.nativeModel,
865  self.wroteNativeProtein = 1
866 
867  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
868 
869  # Run MD according to the temperature schedule read in from parameters
870  def runMolecularDynamics(self):
871  self.initializedFlexFlexLjRestraints = 0
872  if (self.fixedNonFlexibleAtoms == 0):
873  print "error: before running md, must fix non flexible atoms"
874  sys.exit()
875  stepsSoFar = 0
876  for stage in self.mdStages:
877  [steps, temperature, excludedVolume] = stage.split('_')
878  stepsSoFar += int(steps)
879  self.vsos.set_temperature(int(temperature))
880  if (excludedVolume == "1"): # add lennard jones restraints between flexible atoms and fixed atoms (don't add between flex atoms)
881  self.addFixedFlexibleLjRestraints()
882  elif (excludedVolume != "0"):
883  # add lennard jones restraints between flexible atoms
884  if (self.getParam("excluded_volume_type") == "lj"):
885  self.addFlexFlexLjRestraints(float(excludedVolume))
886  elif (self.getParam("excluded_volume_type") == "ev"):
887  self.addExcludedVolumeRestraints(
888  ) # add full excluded volume restraints (just take everything in the protein)
889  else:
890  "please set excluded_volume_type to either lj or ev"
891  sys.exit()
892  print "running md at temperature %s for %s steps" % (temperature, steps)
893  self.mdOptimizer.optimize(int(steps))
894 
896  self.protein,
897  os.path.join(
898  self.getParam(
899  "output_directory"),
900  "md_after_%s.pdb" %
901  stepsSoFar))
902  print "model score after stage %s is %s" % (stage, self.model.evaluate(False))
903  print "done running md"
904 
905  def runAllCg(self):
906  cgInterval = int(self.getParam("cg_interval"))
907  outputDir = self.getParam("output_directory")
908  trajectoryFile = self.getParam("md_trajectory_output_file")
909  fullFile = os.path.join(outputDir, trajectoryFile)
910  print "open rmf %s" % fullFile
911  rh = RMF.open_rmf_file(fullFile)
912  IMP.rmf.set_hierarchies(rh, [self.protein])
913  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
914  rh, cgInterval, self.protein)
915 
916  for i in framesToRead:
917  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
918  self.applyCg(i, cgFileName)
919 
920  # Apply CG to each step of the saved MD trajectory. Writes results to same
921  # file storing the MD trajectory
922  def applyCg(self, mdStepStart, cgFileName):
923 
924  cgSteps = int(self.getParam("cg_steps"))
925 
926  # load md step specified by calling object
927  print "apply cg: loading md frame for cg step start %s" % mdStepStart
928  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
929 
930  # create new hdf5 file to which cg output will be written
931  outputDir = self.getParam("output_directory")
932  fileName = os.path.join(outputDir, cgFileName)
933  print "creating cg hdf5 file %s" % fileName
934  rh = RMF.create_rmf_file(fileName)
935  my_kc = rh.add_category("my data")
936  IMP.rmf.add_hierarchy(rh, self.protein)
937 
938  # apply cg
939  cg = IMP.core.ConjugateGradients(self.model)
940  firstScore = self.model.evaluate(False)
941 
942  print "creating optimizer state"
943  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
944  [self.protein], rh)
945  hdos.set_skip_steps(0)
946  # hdos is the optimizer state writing configurations to disk
947  cg.add_optimizer_state(hdos)
948 
949  print "running cg"
950  cg.optimize(cgSteps)
951  secondScore = self.model.evaluate(False)
952  print "cg score after md step %s before %s after %s" % (mdStepStart, firstScore, secondScore)
953  return secondScore
954 
955  # Tell the optimizer not to move atoms we have determined ar fixed
956  def fixNonFlexibleAtoms(self):
957  if (self.createdMdOptimizer == 0):
958  self.createMdOptimizer()
959  # print "ERROR: must create md optimizer and optimize once before set_particles_are_optimized is called"
960  # sys.exit()
961 
962  self.fixedNonFlexibleAtoms = 1
963  leaves = IMP.atom.get_leaves(self.protein)
964  for leaf in leaves:
965  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
966  xyzDecorator = IMP.core.XYZ(leaf)
967  xyzDecorator.set_coordinates_are_optimized(0)
968 
969  def writeOutput(self):
970  restraintCount = self.model.get_number_of_restraints()
971  leaves = IMP.atom.get_leaves(self.protein)
972  flexiblePeptideAtoms = 0
973  flexibleProteinAtoms = 0
974  fixedPeptideAtoms = 0
975  fixedProteinAtoms = 0
976 
977  for leaf in leaves:
978  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
979  if (self.getChain(leaf) == self.getParam("peptide_chain")):
980  fixedPeptideAtoms += 1
981  else:
982  fixedProteinAtoms += 1
983  else:
984  if (self.getChain(leaf) == self.getParam("peptide_chain")):
985  flexiblePeptideAtoms += 1
986  else:
987  flexibleProteinAtoms += 1
988  print "Flexible peptide atoms: %s" % flexiblePeptideAtoms
989  print "Fixed peptide atoms: %s" % fixedPeptideAtoms
990  print "Flexible protein atoms: %s" % flexibleProteinAtoms
991  print "Fixed protein atoms: %s" % fixedProteinAtoms
992  print "Total number of restraints: %s" % restraintCount
993 
994  def writeOsOutput(self):
995 
996  print "Best cg score: %s" % self.bestCgScore
997  print "best cg rmsd: %s" % self.bestCgRmsd
998  print "final cg rmsd: %s" % self.finalCgRmsd
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Score the angle based on a UnaryFunction,.
Simple conjugate gradients optimizer.
algebra::BoundingBoxD< 3 > get_bounding_box(const Hierarchy &h)
Get a bounding box for the Hierarchy.
void load_frame(RMF::FileConstHandle file, unsigned int frame)
Definition: frames.h:27
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_rmsd(const core::XYZs &s0, const core::XYZs &s1, const IMP::algebra::Transformation3D &tr_for_second)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:87
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
static Float get_k_from_standard_deviation(Float sd, Float t=297.15)
Return the k to use for a given Gaussian standard deviation.
Definition: core/Harmonic.h:62
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:63
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
Lennard-Jones score between a pair of particles.
Simple molecular dynamics optimizer.
Store a kernel::ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Score the bond based on a UnaryFunction,.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
Definition: VectorD.h:209
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Smooth interaction scores by switching the derivatives (force switch).
void read_pdb(base::TextInput input, int model, Hierarchy h)
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27