IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
peptideDocker.py
1 from __future__ import print_function
2 import IMP
3 import subprocess
4 import random
5 import IMP.domino
6 import IMP.core
7 import IMP.rmf
8 import RMF
9 
10 import time
11 import IMP.algebra
12 import sys
13 import os
14 from . import atomicDominoUtilities
15 
16 
17 class PeptideDocker:
18 
19  def __init__(self, parameterFileName):
20  self.createdMdOptimizer = 0
21  self.fixedNonFlexibleAtoms = 0
22  self.readParameters(parameterFileName)
23  self.wroteNativeProtein = 0
24 
25  def loadHelpers(self):
26  self.loadDockingHelpers()
27  self.loadMdHelpers()
28 
29  def readParameters(self, parameterFileName):
30  self.parameters = atomicDominoUtilities.readParameters(
31  parameterFileName)
32 
33  # Processing specific to peptide docking and this script
34  def loadDockingHelpers(self):
35  mkdirProcess = subprocess.Popen(
36  ['mkdir',
37  '-p',
38  self.getParam('output_directory')],
39  shell=False,
40  stderr=subprocess.PIPE)
41  _ = mkdirProcess.communicate()
42 
43  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
44  self.protein)
45 
46  self.times = []
47  self.rawTimes = []
48 
49  # Helper method; create dictionary where the keys are names of all
50  # particles in the system and the values are themselves dictionaries
51  def initializeParticleNameDict(self, leaves):
52  particleDict = {}
53 
54  for leaf in leaves:
55  name = self.quickParticleName(leaf)
56  particleDict[name] = {}
57  return particleDict
58 
59  def getModel(self):
60  return self.model
61 
62  def getProtein(self):
63  return self.protein
64 
65  def createModel(self):
66 
67  pdbName = self.getParam("native_pdb_input_file")
68 
69  self.model = IMP.Model()
70 
71  self.protein = IMP.atom.read_pdb(
72  pdbName,
73  self.model,
75 
76  # atom radii, forcefield topology, etc. -- no restraints created here
77 
79  IMP.atom.get_data_path("par.lib"))
80 
81  topology = ff.create_topology(self.protein)
82  topology.apply_default_patches()
83 
84  topology.add_atom_types(self.protein)
85 
86  ff.add_radii(self.protein)
87  ff.add_well_depths(self.protein)
88  self.ff = ff
89  self.topology = topology
90 
91  # Get parameter value. Throws dictionary exception if not found
92  def getParam(self, paramName):
93  paramValue = self.parameters[paramName]
94  return paramValue
95 
96  # Peptide-docking friendly way to log which restraint we're dealing with
97  def quickRestraintName(self, r):
98  nameList = []
99  ps = r.get_input_particles()
100 
101  for p in ps:
102  if (self.isAtomParticle(p) == 0):
103  continue
104  isFlexible = 0
105  if ((self.quickParticleName(p) in self.flexibleAtoms) == 1):
106  isFlexible = 1
107  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
108 
109  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
110 
111  def quickParticleName(self, particle):
112  return atomicDominoUtilities.quickParticleName(particle)
113 
114  # output current time
115  def logTime(self, label):
116  nextTime = time.asctime(time.localtime(time.time()))
117  print("%s:\t %s" % (label, nextTime))
118  nextTimeEntry = [label, nextTime]
119  self.times.append(nextTimeEntry)
120  self.rawTimes.append(time.time())
121 
122  # output all times that were saved by logTime()
123  def outputTimes(self):
124  print("Time for each step:")
125  for timeEntry in self.times:
126  label = timeEntry[0]
127  value = timeEntry[1]
128  print("%s:\t%s" % (label, value))
129 
130  def getFlexibleAtoms(self):
131  return self.flexibleAtoms
132 
133  # Helper method to quickly get the chain identifier for a particle
134  def getChain(self, particle):
135  atomDecorator = IMP.atom.Atom(particle)
136  residue = IMP.atom.get_residue(atomDecorator)
137  chain = IMP.atom.get_chain(residue)
138  chainId = chain.get_id()
139  return chainId
140 
141  # Helper method to quickly get the residue index for a particle
142  def getResidue(self, particle):
143  atomDecorator = IMP.atom.Atom(particle)
144  residue = IMP.atom.get_residue(atomDecorator)
145  residueNumber = residue.get_index()
146  return residueNumber
147 
148  def isAtomParticle(self, p):
149  return atomicDominoUtilities.isAtomParticle(p)
150 
151  # Read all particles in restraint; return 1 if all are flexible and 0 if
152  # at least one atom is fixed
153  def allFlexibleAtoms(self, r):
154  ps = r.get_input_particles()
155  for p in ps:
156  if (self.isAtomParticle(p) == 0):
157  continue
158  if ((self.quickParticleName(p) in self.flexibleAtoms) == 0):
159  return 0
160  return 1
161 
162  # Check if we should skip processing of this pair of particles. Don't add
163  # restraints between particles that aren't optimized,
164  # and don't add restraints between particles on the same residue (for now
165  # assume stereochemistry is good enough for this)
166  def skipNonBondedPair(self, pair):
167 
168  # skip if same residue
169  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
170  return 1
171 
172  # skip if neither is flexible
173  if ((self.quickParticleName(pair[0]) in self.flexibleAtoms) == 0
174  and (self.quickParticleName(pair[1])
175  in self.flexibleAtoms) == 0):
176  return 1
177  return 0
178 
179  # Set up degrees of freedom for the system. Either we are dealing with
180  # a fixed protein or a flexible one.
181  # Determine which atoms are flexible and save them in self.flexibleAtoms
182  def initDof(self):
183 
184  peptideChain = self.getParam("peptide_chain")
185  self.flexibleAtoms = {}
186 
187  if (self.getParam("flexible_protein") == "yes"):
188  # All atoms within range of the peptide are flexible; range defined
189  # by close_pair_distance param
190  particlePairs = self.getClosePairs()
191 
192  for pair in particlePairs:
193  p0 = pair[0]
194  p1 = pair[1]
195  p0Chain = self.getChain(p0)
196  p1Chain = self.getChain(p1)
197  if (p0Chain == peptideChain or p1Chain == peptideChain):
198  self.flexibleAtoms[self.quickParticleName(p0)] = 1
199  self.flexibleAtoms[self.quickParticleName(p1)] = 1
200 
201  else:
202  # only peptide atoms are flexible, determined by the chain id
203  leaves = IMP.atom.get_leaves(self.protein)
204  for leaf in leaves:
205  if (self.getChain(leaf) == peptideChain):
206  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
207  self.fixNonFlexibleAtoms()
208 
209  # return a dictionary where the keys are the names of the fixed atoms in
210  # the system
211  def getFixedAtoms(self):
212  leaves = IMP.atom.get_leaves(self.protein)
213  fixedAtoms = {}
214  for leaf in leaves:
215  leafName = self.quickParticleName(leaf)
216  if (leafName in self.flexibleAtoms) == 0:
217  fixedAtoms[leafName] = 1
218  return fixedAtoms
219 
220  # for each flexible atom, give it the coordinates of another flexible
221  # atom (ensures that each atom starts out in the binding site)
222  # Although it does guarantee a very random initial configuration, it is
223  # cheating since we are using knowledge of the native state
224  def initializeRandomExisting(self):
225  leaves = IMP.atom.get_leaves(self.protein)
226  particlesToVary = []
227  xyzs = []
228  for leaf in leaves:
229  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 1):
230  particlesToVary.append(self.quickParticleName(leaf))
231  atomToCoordinates = {}
232  for pName in particlesToVary:
233  randomPname = random.choice(particlesToVary)
234  randomP = self.namesToParticles[randomPname]
235  print("setting coordinates for particle %s to the ones currently "
236  "set for particle %s" % (pName, randomPname))
237  atomToCoordinates[pName] = IMP.core.XYZ(randomP).get_coordinates()
238 
239  for pName in particlesToVary:
240  particle = self.namesToParticles[pName]
241  xyz = IMP.core.XYZ(particle)
242  xyz.set_coordinates(atomToCoordinates[pName])
243  xyzs.append(xyz)
244  return xyzs
245 
246  # Set initial positions of flexible atoms in the system. We are
247  # experimenting with the best way to do this
248  def setInitialPositions(self):
249  initialPositionMethod = self.getParam("initial_position_method")
250 
251  # set random coordinates for atom by creating bounding box around
252  # native coordinates, increasing it by defined offset plus random
253  # number (necessary when running on cluster)
254  if (initialPositionMethod == "random"):
255 
256  myRandom = random.random()
257  initialOffset = float(
258  self.getParam("initial_atom_offset")) + myRandom
259  for particleName in self.flexibleAtoms.keys():
260  p = self.namesToParticles[particleName]
261  xyzDecorator = IMP.core.XYZ(p)
262  coordinates = xyzDecorator.get_coordinates()
263  bb = IMP.algebra.BoundingBox3D(coordinates)
264  bb += initialOffset
265  bb += myRandom
266 
267  randomXyz = IMP.algebra.get_random_vector_in(bb)
268  xyzDecorator.set_coordinates(randomXyz)
269 
270  # for each flexible atom, give it the coordinates of another flexible
271  # atom (ensures that each atom starts out in the binding site)
272  elif (initialPositionMethod == "random_existing"):
273  print("start random existing")
274  xyzs = self.initializeRandomExisting()
275 
276  # same as random_existing except offset each atom a little (doesn't
277  # seem to make too much of a difference)
278  elif (initialPositionMethod == "random_existing_box"):
279  xyzs = self.initializeRandomExisting()
280  initialOffset = float(self.getParam("initial_atom_offset"))
281  for xyzDecorator in xyzs:
282  coordinates = xyzDecorator.get_coordinates()
283  bb = IMP.algebra.BoundingBox3D(coordinates)
284  bb += initialOffset
285 
286  randomXyz = IMP.algebra.get_random_vector_in(bb)
287  xyzDecorator.set_coordinates(randomXyz)
288 
289  elif (initialPositionMethod == "random_box"):
290 
291  # Get bounding box containing all particles within close pair
292  # distance of flexible atoms
293  # This doesn't work perfectly; can put initial positions in the
294  # fixed atoms accidentally
295  # As constructed it is specific to peptide atoms only and not
296  # flexible protein atoms
297  bsParticles = []
298 
299  for firstPname in self.nonBondedPairs.keys():
300  secondPnames = self.nonBondedPairs[firstPname]
301  for secondPname in secondPnames.keys():
302  if ((secondPname in self.flexibleAtoms) == 0):
303  fixedAtom = self.namesToParticles[firstPname]
304 
305  bsParticles.append(IMP.core.XYZ(fixedAtom))
306  print("added next close atom %s to bounding box "
307  "particles" % firstPname)
308  # bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert
309  # if I can figure out how
310  peptideParticles = []
311  leaves = IMP.atom.get_leaves(self.protein)
312  for leaf in leaves:
313  if (self.getChain(leaf) == self.getParam("peptide_chain")):
314  peptideParticles.append(IMP.core.XYZ(leaf))
315  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
316  # set initial position for each flexible atoms, and adjust it if it
317  # is too close to a fixed residue
318  fixedAtoms = self.getFixedAtoms()
319  for flexPname in self.flexibleAtoms.keys():
320  goodPosition = 0
321  flexParticle = self.namesToParticles[flexPname]
322  flexXyzDecorator = IMP.core.XYZ(flexParticle)
323  print("processing position for pname %s" % flexPname)
324  while goodPosition == 0:
325  goodPosition = 1
326  flexXyzDecorator.set_coordinates(
327  IMP.algebra.get_random_vector_in(bsBoundingBox))
328  for fixedPname in fixedAtoms.keys():
329  fixedParticle = self.namesToParticles[fixedPname]
330  distance = IMP.algebra.get_distance(
331  IMP.core.XYZ(fixedParticle).get_coordinates(),
332  IMP.core.XYZ(flexParticle).get_coordinates())
333  if (distance < 2):
334  print("finding new position for %s" % flexPname)
335  goodPosition = 0
336  # start the while loop over which will set new
337  # coordinates
338  break
339 
340  # take a box around the whole protein and randomly initialize atoms
341  # here
342  elif (initialPositionMethod == "random_full"):
343  bb = IMP.atom.get_bounding_box(self.protein)
344  for particleName in self.flexibleAtoms.keys():
345  print("creating random position for particle %s"
346  % particleName)
347  p = self.namesToParticles[particleName]
348  xyzDecorator = IMP.core.XYZ(p)
349  randomXyz = IMP.algebra.get_random_vector_in(bb)
350  myRandom = random.random()
351  bb += myRandom
352  xyzDecorator.set_coordinates(randomXyz)
353 
354  # Read in a file generated from a previous run (fastest way is to
355  # create a new model and copy corresponding XYZ Decorators)
356  elif (initialPositionMethod == "file"):
357 
358  initialPositionFile = self.getParam("saved_initial_atom_positions")
359  print("reading initial positions from %s" % initialPositionFile)
360  initialModel = IMP.Model()
361  initialProtein = IMP.atom.read_pdb(
362  initialPositionFile,
363  initialModel,
365  initialLeaves = IMP.atom.get_leaves(initialProtein)
366  for initialLeaf in initialLeaves:
367  name = self.quickParticleName(initialLeaf)
368  if ((name in self.namesToParticles) == 1):
369  existingLeaf = self.namesToParticles[name]
370  existingLeafXyz = IMP.core.XYZ(existingLeaf)
371  initialLeafXyz = IMP.core.XYZ(initialLeaf)
372  existingLeafXyz.set_coordinates(
373  initialLeafXyz.get_coordinates())
374  else:
375  print("Read in initial positions from file %s but this "
376  "file contained particle %s which is not in current "
377  "model" % (initialPositionFile, name))
378  else:
379  print("Please specify valid initial position method "
380  "(random or file)\n")
381  sys.exit()
382 
383  # output initial positions
384  outputDir = self.getParam("output_directory")
385  initialAtomPositionFile = self.getParam(
386  "initial_atom_positions_output_file")
387  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
388  print("writing output to file %s" % fullOutputFile)
389  IMP.atom.write_pdb(self.protein, fullOutputFile)
390 
391  # get initial score for model
392  initialScore = self.model.evaluate(False)
393  print("initial score for model is %s" % initialScore)
394 
395  # Get non-bonded pairs in the system in preparation for restraining them
396  def getNonBondedPairs(self):
397  nonBondedDefinition = self.getParam("non_bonded_definition")
398  particlePairs = []
399  if (nonBondedDefinition == "close_pairs"):
400  particlePairs = self.getClosePairs()
401  elif (nonBondedDefinition == "random"):
402  particlePairs = self.getRandomParticlePairs()
403 # elif (nonBondedDefinition == "manual"):
404 # particlePairs = self.readManualRestraints(nativeProtein, 6)
405  else:
406  print("Please specify valid non bonded pair definition")
407  sys.exit()
408  return particlePairs
409 
410  # Get 3D distance between the particles in the pair
411  def getXyzDistance(self, pair):
412  d0 = IMP.core.XYZ(pair[0])
413  d1 = IMP.core.XYZ(pair[1])
414  distance = IMP.core.get_distance(d0, d1)
415  return distance
416 
417  # Get all pairs of particles within the distance defined by
418  # close_pair_distance parameter
419  def getClosePairs(self):
420 
421  # Get a list of all atoms in the protein, and put it in a container
422  leaves = IMP.atom.get_leaves(self.protein)
424  closePairDistance = float(self.getParam("close_pair_distance"))
425  # sometimes closePairContainer returns particles > distanceCutoff, even
426  # with slack = 0
427  useHardCutoff = self.getParam("use_hard_distance_cutoff")
428 
429  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
430 
431  self.model.evaluate(False)
432 
433  particlePairs = nbl.get_particle_pairs()
434  finalPairs = []
435  for pair in particlePairs:
436  distance = self.getXyzDistance(pair)
437 
438  if (useHardCutoff == "0"
439  or (useHardCutoff == "1"
440  and distance < closePairDistance)):
441  finalPairs.append([pair[0], pair[1]])
442 
443  return finalPairs
444 
445  # Add restraints for bond lengths, angles, dihedrals, and impropers
446  def addForceFieldRestraints(self):
447 
448  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
449  if (useForcefieldRestraints == "1"):
450 
452  IMP.atom.get_data_path("top_heav.lib"),
453  IMP.atom.get_data_path("par.lib"))
454 
455  # Set up and evaluate the stereochemical part (bonds, angles,
456  # dihedrals, impropers) of the CHARMM forcefield
457  bonds = self.topology.add_bonds(self.protein)
458  angles = ff.create_angles(bonds)
459  dihedrals = ff.create_dihedrals(bonds)
460  impropers = self.topology.add_impropers(self.protein)
461 
462  # tracks which particles are together in forcefield restraints
463  # (useful for filtering excluded volume)
464  self.ffParticleGroups = self.initializeParticleNameDict(
465  IMP.atom.get_leaves(self.protein))
466 
467  self.createForceFieldRestraintsForModel(
468  bonds,
469  "bonds",
470  "Bond Restraints",
473  0,
474  1)))
475  self.createForceFieldRestraintsForModel(
476  angles,
477  "angles",
478  "Angle Restraints",
481  0,
482  1)))
483  self.createForceFieldRestraintsForModel(
484  dihedrals,
485  "dihedrals",
486  "Dihedral Restraints",
488  self.createForceFieldRestraintsForModel(
489  impropers,
490  "impropers",
491  "Improper Restraints",
494  0,
495  1)))
496 
497  else:
498  print("Skipping forcefield restraints")
499 
500  # Create one type of forcefield restraint. The restraint is initially
501  # across the full model and needs to be decomposed so it can be used
502  # in the restraint graph.
503  def createForceFieldRestraintsForModel(
504  self,
505  ffParticles,
506  containerName,
507  restraintName,
508  singletonScore):
509 
510  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
511  cont = IMP.container.ListSingletonContainer(self.model, containerName)
512  cont.add_particles(ffParticles)
513  listBondRestraint = IMP.container.SingletonsRestraint(
514  singletonScore, cont, restraintName)
515 
516  # Restraint decomposition has been a little touchy; a couple
517  # workarounds contained
518  decomposedRestraintTemp = listBondRestraint.create_decomposition()
519  decomposedRestraints = IMP.RestraintSet.get_from(
520  decomposedRestraintTemp)
521 
522  # manually remove the full restraint after getting the decomposition
523  del listBondRestraint
524 
525  count = decomposedRestraints.get_number_of_restraints()
526  for i in range(count):
527 
528  r = decomposedRestraints.get_restraint(i)
529 
530  if (self.allFlexibleAtoms(r) == 1):
531  # add the decomposed restraint to the model
532  self.model.add_restraint(r)
533  r.set_maximum_score(maxForceFieldScore)
534  self.addParticlesToGroups(r)
535 
536  # Add all atoms in r to ffParticleGroups; tracks which atoms are
537  # restrained by forcefield terms so we don't restrain them in other
538  # methods (e.g. excluded volume) -- essentially a manual pair-filter
539  def addParticlesToGroups(self, r):
540  inputParticles = []
541  ps = r.get_input_particles()
542 
543  for p in ps:
544 
545  # skip particles that aren't actually atoms (e.g. bond particles)
546  if (self.isAtomParticle(p) == 0):
547  continue
548  inputParticles.append(p.get_name())
549 
550  for firstName in inputParticles:
551  for secondName in inputParticles:
552  if (firstName == secondName):
553  continue
554  self.ffParticleGroups[firstName][secondName] = 1
555 
556  # Add excluded volume restraints across close pairs. Still a work in
557  # progress as they have been slowing down the code quite a bit and
558  # we'll probably want to turn them on only at a defined time
559  def addExcludedVolumeRestraints(self):
560 
561  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
562  self.protein, self.model, self.namesToParticles)
563  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
564 
565  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
566  self.model.add_restraint(evr)
567 
568  # Add lennard jones restraints between flexible atoms if they haven't been
569  # added already, and then scale radii of flexible atoms by input scaling
570  # factor
571  def addFlexFlexLjRestraints(self, scalingFactor):
572  if (self.initializedFlexFlexLjRestraints == 0):
573  print("adding initial lennard jones restraints between pairs "
574  "of flexible atoms")
575  self.initializedFlexFlexLjRestraints = 1
576  leaves = IMP.atom.get_leaves(self.protein)
577 
578  sf = IMP.atom.ForceSwitch(6.0, 7.0)
580  for i in range(len(leaves)):
581  iLeaf = leaves[i]
582  for j in range(i + 1, len(leaves)):
583  jLeaf = leaves[j]
584  if ((self.quickParticleName(iLeaf) in self.flexibleAtoms)
585  == 1 and
586  (self.quickParticleName(jLeaf) in self.flexibleAtoms)
587  == 1):
588  ljpr = IMP.core.PairRestraint(
589  ps, [iLeaf, jLeaf], "LennardJones %s_%s" %
590  (i, j))
591  self.model.add_restraint(ljpr)
592  print("scaling atomic radii by scaling factor %s" % scalingFactor)
593  # reset radii back to original in preparation for rescaling
594  self.ff.add_radii(self.protein)
595  for pName in self.flexibleAtoms.keys():
596  particle = self.namesToParticles[pName]
597  xyzr = IMP.core.XYZR(particle)
598  radius = xyzr.get_radius()
599  radius *= scalingFactor
600  xyzr.set_radius(radius)
601 
602  # add lennard-jones restraints between all fixed atoms and all flexible
603  # atoms
604  def addFixedFlexibleLjRestraints(self):
605  print("adding initial fixed flexible lj restraints")
606  fixedAtoms = self.getFixedAtoms()
607  sf = IMP.atom.ForceSwitch(6.0, 7.0)
609 
610  for flexPname in self.flexibleAtoms.keys():
611  flexParticle = self.namesToParticles[flexPname]
612  for fixedPname in fixedAtoms.keys():
613  fixedParticle = self.namesToParticles[fixedPname]
614  ljpr = IMP.core.PairRestraint(
615  ps, [fixedParticle, flexParticle], "LennardJones %s_%s" %
616  (flexPname, fixedPname))
617  self.model.add_restraint(ljpr)
618 
619  # actually do the work to create the DOPE restraint
620  def createDopeRestraint(self, pair):
621  q0 = self.quickParticleName(pair[0])
622  q1 = self.quickParticleName(pair[1])
623  # create restraint around this distance, add to model
624  prName = "NonBonded %s_%s" % (q0, q1)
625  pairRestraint = IMP.core.PairRestraint(
626  self.dopePairScore, pair, prName)
627 
628  # print "Added non-bonded restraint between %s and %s" % (q0, q1)
629  self.model.add_restraint(pairRestraint)
630  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
631  pairRestraint.set_maximum_score(maxNonBondedScore)
632  # initialScore = pairRestraint.evaluate(0)
633 
634  # add non-bonded restraints across close pairs. At least one atom in each
635  # pair should be flexible
636  def addClosePairNonBondedRestraints(self):
637  particlePairs = self.getNonBondedPairs()
638 
639  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
640  dopeCutoff = float(self.getParam("dope_cutoff"))
641 
642  # k = IMP.core.Harmonic.get_k_from_standard_deviation(1)
643 
644  ps = 0
645  if (nonBondedRestraintType == "dope"):
646  IMP.atom.add_dope_score_data(self.protein)
647  ps = IMP.atom.DopePairScore(dopeCutoff)
648  self.dopePairScore = ps
649  nonBondedCount = 0
650  self.nonBondedPairs = self.initializeParticleNameDict(
651  IMP.atom.get_leaves(self.protein))
652  for pair in particlePairs:
653 
654  if (self.skipNonBondedPair(pair) == 1):
655  continue
656 
657  self.createDopeRestraint(pair)
658 
659  nonBondedCount += 1
660  self.nonBondedPairs[self.quickParticleName(pair[0])][
661  self.quickParticleName(pair[0])] = 1
662  self.nonBondedPairs[self.quickParticleName(pair[1])][
663  self.quickParticleName(pair[1])] = 1
664 
665  print("Added %s non-bonded restraints" % nonBondedCount)
666  self.model.evaluate(False)
667 
668  def addCompleteNonBondedRestraints(self):
669  leaves = IMP.atom.get_leaves(self.protein)
670  counter = 0
671 
672  for i in range(len(leaves)):
673  iLeaf = leaves[i]
674  for j in range(i + 1, len(leaves)):
675  jLeaf = leaves[j]
676 
677  if ((self.quickParticleName(jLeaf)
678  in self.nonBondedPairs[self.quickParticleName(iLeaf)])
679  == 1):
680  continue
681  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
682  continue
683 
684  self.createDopeRestraint([iLeaf, jLeaf])
685  counter += 1
686  # print "added complete restraint between %s and %s" %
687  # (self.quickParticleName(iLeaf),
688  # self.quickParticleName(jLeaf))
689  print("added %s restraints for %s particles" % (counter, len(leaves)))
690  self.model.evaluate(False)
691 
692  # lots of output on all restraints
693  def writeAllRestraints(self):
694  print("write all restraints:")
695  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
696  print(self.quickRestraintName(r))
697 
698  # Read parameterss with md temperature / step schedule and save them
699  def initializeMdSchedule(self):
700  schedule = self.getParam("md_schedule")
701  stages = schedule.split()
702  totalSteps = 0
703  for stage in stages:
704  print("unpacking stage %s" % stage)
705  [steps, temperature, excludedVolume] = stage.split('_')
706  totalSteps += int(steps)
707  self.totalMdSteps = totalSteps
708  self.mdStages = stages
709 
710  # Creates roothandle for hdf5 objects interacting with md trajectory
711  # Either we create a new trajectory in this file and read it back in the
712  # same run, or we are reading a saved trajectory
713  def createMdRootHandle(self):
714 
715  mdFileName = ""
716  outputDir = self.getParam("output_directory")
717 
718  if (self.getParam("read_assignments_from_file") == "no"):
719  # create new hdf5 file to which md output will be written
720  mdFileName = self.getParam("md_trajectory_output_file")
721  fileName = os.path.join(outputDir, mdFileName)
722  print("creating rmf file with filename %s" % fileName)
723  rh = RMF.create_rmf_file(fileName)
724  # my_kc = rh.add_category("my data")
725  IMP.rmf.add_hierarchy(rh, self.protein)
726 
727  self.rootHandle = rh
728  else:
729  # use existing hdf5 file
730  mdFileName = self.getParam("saved_md_filename")
731 
732  rh = RMF.open_rmf_file(mdFileName)
733  IMP.rmf.set_hierarchies(rh, [self.protein])
734 
735  self.rootHandle = rh
736 
737  def evaluateModel(self):
738  return self.model.evaluate(False)
739 
740  def loadMdHelpers(self):
741 
742  self.createMdRootHandle()
743 
744  self.createMdOptimizer()
745 
746  self.addMdOptimizerStates()
747 
748  self.initializeMdSchedule()
749 
750  self.createdMdOptimizer = 1
751 
752  # Create the optimizer object for running MD
753  def createMdOptimizer(self):
754  print("creating md object")
755  md = IMP.atom.MolecularDynamics(self.model)
756  # hack to get around bug where vx attributes in non-optimized particles
757  # are not being set
758  md.optimize(1)
759  md.set_velocity_cap(100.0)
760  self.mdOptimizer = md
761 
762  def addMdOptimizerStates(self):
763 
764  # VelocityScalingOptimizerState
766  IMP.atom.get_leaves(self.protein),
767  0) # temperature is set later
768  # vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
769 
770  # SaveHierarchyConfigurationOptimizerState
771  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
772  [self.protein], self.rootHandle)
773  self.mdOptimizer.add_optimizer_state(hdos)
774 
775  self.hdos = hdos
776  self.vsos = vsos
777 
778  def readTrajectories(self):
779 
780  cgFileName = self.getParam("cg_output_file")
781  bestCgScore = 10000000
782  bestCgScoreFile = ""
783  bestCgRmsd = 10000000
784  bestCgRmsdFile = ""
785 
786  outputDir = self.getParam("output_directory")
787  trajectoryFile = self.getParam("md_trajectory_output_file")
788  fullFile = os.path.join(outputDir, trajectoryFile)
789  rh = RMF.open_rmf_file(fullFile)
790  IMP.rmf.set_hierarchies(rh, [self.protein])
791  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
792  rh, int(self.getParam("cg_interval")), self.protein)
793 
794  if (len(framesToRead) > 0):
795  for cgNumber in framesToRead:
796  # Open next cg trajectory
797  outputDir = self.getParam("output_directory")
798  fullCgFileName = os.path.join(
799  outputDir, "%s%s" %
800  (cgFileName, cgNumber))
801  rh = RMF.open_rmf_file(fullCgFileName)
802  IMP.rmf.set_hierarchies(rh, [self.protein])
803 
804  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
805  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
806  score = self.model.evaluate(False)
807  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
808  print("cg number %s has score %s rmsd %s"
809  % (cgNumber, score, rmsd))
810  if (score < bestCgScore):
811  bestCgScore = score
812  bestCgScoreFile = fullCgFileName
813  if (rmsd < bestCgRmsd):
814  bestCgRmsd = rmsd
815  bestCgRmsdFile = fullCgFileName
816 
817  # output best score information
818  self.singlePdbResults(
819  bestCgScoreFile,
820  -1,
821  self.getParam("best_cg_score_output_file"))
822  self.singlePdbResults(
823  bestCgRmsdFile,
824  -1,
825  self.getParam("best_cg_rmsd_output_file"))
826  self.singlePdbResults(
827  "%s%s" % (cgFileName, framesToRead[-1]),
828  -1, self.getParam("final_cg_frame_output_file"))
829  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
830  self.bestCgScore = bestCgScore
831  self.bestCgRmsd = bestCgRmsd
832  self.finalCgRmsd = finalCgRmsd
833 
834  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
835 
836  fullTrajectoryFile = os.path.join(
837  self.getParam("output_directory"),
838  trajectoryFile)
839  fullOutputFile = os.path.join(
840  self.getParam("output_directory"),
841  outputPdbFile)
842  rh = RMF.open_rmf_file(fullTrajectoryFile)
843  IMP.rmf.set_hierarchies(rh, [self.protein])
844  if (frame == -1):
845  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
846  IMP.rmf.load_frame(rh, frame, self.protein)
847  IMP.atom.write_pdb(self.protein, fullOutputFile)
848 
849  def calculateRmsd(self, otherProtein, flexibleAtoms):
850  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
851  otherProtein)
852  otherVector = []
853  modelVector = []
854  for pName in otherNamesToParticles.keys():
855  if ((pName in flexibleAtoms) == 0):
856  continue
857  otherParticle = otherNamesToParticles[pName]
858  modelParticle = self.namesToParticles[pName]
859  otherVector.append(
860  IMP.core.XYZ(otherParticle).get_coordinates())
861  modelVector.append(
862  IMP.core.XYZ(modelParticle).get_coordinates())
863  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
864  return rmsd
865 
866  def calculateNativeRmsd(self, flexibleAtoms):
867 
868  if (self.wroteNativeProtein == 0):
869  pdbName = self.getParam("native_pdb_input_file")
870  self.nativeModel = IMP.Model()
871  self.nativeProtein = IMP.atom.read_pdb(
872  pdbName,
873  self.nativeModel,
875  self.wroteNativeProtein = 1
876 
877  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
878 
879  # Run MD according to the temperature schedule read in from parameters
880  def runMolecularDynamics(self):
881  self.initializedFlexFlexLjRestraints = 0
882  if (self.fixedNonFlexibleAtoms == 0):
883  print("error: before running md, must fix non flexible atoms")
884  sys.exit()
885  stepsSoFar = 0
886  for stage in self.mdStages:
887  [steps, temperature, excludedVolume] = stage.split('_')
888  stepsSoFar += int(steps)
889  self.vsos.set_temperature(int(temperature))
890  # add lennard jones restraints between flexible atoms and
891  # fixed atoms (don't add between flex atoms)
892  if excludedVolume == "1":
893  self.addFixedFlexibleLjRestraints()
894  elif (excludedVolume != "0"):
895  # add lennard jones restraints between flexible atoms
896  if (self.getParam("excluded_volume_type") == "lj"):
897  self.addFlexFlexLjRestraints(float(excludedVolume))
898  elif (self.getParam("excluded_volume_type") == "ev"):
899  # add full excluded volume restraints (just take
900  # everything in the protein)
901  self.addExcludedVolumeRestraints()
902  else:
903  "please set excluded_volume_type to either lj or ev"
904  sys.exit()
905  print("running md at temperature %s for %s steps"
906  % (temperature, steps))
907  self.mdOptimizer.optimize(int(steps))
908 
910  self.protein,
911  os.path.join(
912  self.getParam(
913  "output_directory"),
914  "md_after_%s.pdb" %
915  stepsSoFar))
916  print("model score after stage %s is %s"
917  % (stage, self.model.evaluate(False)))
918  print("done running md")
919 
920  def runAllCg(self):
921  cgInterval = int(self.getParam("cg_interval"))
922  outputDir = self.getParam("output_directory")
923  trajectoryFile = self.getParam("md_trajectory_output_file")
924  fullFile = os.path.join(outputDir, trajectoryFile)
925  print("open rmf %s" % fullFile)
926  rh = RMF.open_rmf_file(fullFile)
927  IMP.rmf.set_hierarchies(rh, [self.protein])
928  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
929  rh, cgInterval, self.protein)
930 
931  for i in framesToRead:
932  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
933  self.applyCg(i, cgFileName)
934 
935  # Apply CG to each step of the saved MD trajectory. Writes results to same
936  # file storing the MD trajectory
937  def applyCg(self, mdStepStart, cgFileName):
938 
939  cgSteps = int(self.getParam("cg_steps"))
940 
941  # load md step specified by calling object
942  print("apply cg: loading md frame for cg step start %s" % mdStepStart)
943  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
944 
945  # create new hdf5 file to which cg output will be written
946  outputDir = self.getParam("output_directory")
947  fileName = os.path.join(outputDir, cgFileName)
948  print("creating cg hdf5 file %s" % fileName)
949  rh = RMF.create_rmf_file(fileName)
950  # my_kc = rh.add_category("my data")
951  IMP.rmf.add_hierarchy(rh, self.protein)
952 
953  # apply cg
954  cg = IMP.core.ConjugateGradients(self.model)
955  firstScore = self.model.evaluate(False)
956 
957  print("creating optimizer state")
958  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
959  [self.protein], rh)
960  hdos.set_skip_steps(0)
961  # hdos is the optimizer state writing configurations to disk
962  cg.add_optimizer_state(hdos)
963 
964  print("running cg")
965  cg.optimize(cgSteps)
966  secondScore = self.model.evaluate(False)
967  print("cg score after md step %s before %s after %s"
968  % (mdStepStart, firstScore, secondScore))
969  return secondScore
970 
971  # Tell the optimizer not to move atoms we have determined ar fixed
972  def fixNonFlexibleAtoms(self):
973  if (self.createdMdOptimizer == 0):
974  self.createMdOptimizer()
975  # print "ERROR: must create md optimizer and optimize once before
976  # set_particles_are_optimized is called"
977  # sys.exit()
978 
979  self.fixedNonFlexibleAtoms = 1
980  leaves = IMP.atom.get_leaves(self.protein)
981  for leaf in leaves:
982  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
983  xyzDecorator = IMP.core.XYZ(leaf)
984  xyzDecorator.set_coordinates_are_optimized(0)
985 
986  def writeOutput(self):
987  leaves = IMP.atom.get_leaves(self.protein)
988  flexiblePeptideAtoms = 0
989  flexibleProteinAtoms = 0
990  fixedPeptideAtoms = 0
991  fixedProteinAtoms = 0
992 
993  for leaf in leaves:
994  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
995  if (self.getChain(leaf) == self.getParam("peptide_chain")):
996  fixedPeptideAtoms += 1
997  else:
998  fixedProteinAtoms += 1
999  else:
1000  if (self.getChain(leaf) == self.getParam("peptide_chain")):
1001  flexiblePeptideAtoms += 1
1002  else:
1003  flexibleProteinAtoms += 1
1004  print("Flexible peptide atoms: %s" % flexiblePeptideAtoms)
1005  print("Fixed peptide atoms: %s" % fixedPeptideAtoms)
1006  print("Flexible protein atoms: %s" % flexibleProteinAtoms)
1007  print("Fixed protein atoms: %s" % fixedProteinAtoms)
1008 
1009  def writeOsOutput(self):
1010 
1011  print("Best cg score: %s" % self.bestCgScore)
1012  print("best cg rmsd: %s" % self.bestCgRmsd)
1013  print("final cg rmsd: %s" % self.finalCgRmsd)
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
Get the containing chain or Chain() if there is none.
Score the angle based on a UnaryFunction,.
Simple conjugate gradients optimizer.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:128
double get_rmsd(const Selection &s0, const Selection &s1)
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
Lennard-Jones score between a pair of particles.
Simple molecular dynamics simulator.
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
RestraintsTemp get_restraints(It b, It e)
Definition: RestraintSet.h:132
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Score the bond based on a UnaryFunction,.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
Definition: VectorD.h:213
Applies a PairScore to a Pair.
Definition: PairRestraint.h:31
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Score pair of atoms based on DOPE.
Definition: DopePairScore.h:36
Smooth interaction scores by switching the derivatives (force switch).
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:27
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27