IMP logo
IMP Reference Guide  2.11.1
The Integrative Modeling Platform
peptideDocker.py
1 from __future__ import print_function
2 import IMP
3 import subprocess
4 import random
5 import IMP.domino
6 import IMP.core
7 import IMP.rmf
8 import RMF
9 
10 import time
11 import IMP.algebra
12 import types
13 import re
14 import sys
15 import operator
16 import os
17 from . import atomicDominoUtilities
18 
19 
20 class PeptideDocker:
21 
22  def __init__(self, parameterFileName):
23  self.createdMdOptimizer = 0
24  self.fixedNonFlexibleAtoms = 0
25  self.readParameters(parameterFileName)
26  self.wroteNativeProtein = 0
27 
28  def loadHelpers(self):
29  self.loadDockingHelpers()
30  self.loadMdHelpers()
31 
32  def readParameters(self, parameterFileName):
33  self.parameters = atomicDominoUtilities.readParameters(
34  parameterFileName)
35 
36  # Processing specific to peptide docking and this script
37  def loadDockingHelpers(self):
38  outputDir = self.getParam("output_directory")
39  mkdirProcess = subprocess.Popen(
40  ['mkdir',
41  '-p',
42  self.getParam('output_directory')],
43  shell=False,
44  stderr=subprocess.PIPE)
45  output = mkdirProcess.communicate()
46 
47  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
48  self.protein)
49 
50  self.times = []
51  self.rawTimes = []
52 
53  # Helper method; create dictionary where the keys are names of all
54  # particles in the system and the values are themselves dictionaries
55  def initializeParticleNameDict(self, leaves):
56  particleDict = {}
57 
58  for leaf in leaves:
59  name = self.quickParticleName(leaf)
60  particleDict[name] = {}
61  return particleDict
62 
63  def getModel(self):
64  return self.model
65 
66  def getProtein(self):
67  return self.protein
68 
69  def createModel(self):
70 
71  pdbName = self.getParam("native_pdb_input_file")
72 
73  self.model = IMP.Model()
74 
75  self.protein = IMP.atom.read_pdb(
76  pdbName,
77  self.model,
79 
80  # atom radii, forcefield topology, etc. -- no restraints created here
81 
83  IMP.atom.get_data_path("par.lib"))
84 
85  topology = ff.create_topology(self.protein)
86  topology.apply_default_patches()
87 
88  topology.add_atom_types(self.protein)
89 
90  ff.add_radii(self.protein)
91  ff.add_well_depths(self.protein)
92  self.ff = ff
93  self.topology = topology
94 
95  # Get parameter value. Throws dictionary exception if not found
96  def getParam(self, paramName):
97  paramValue = self.parameters[paramName]
98  return paramValue
99 
100  # Peptide-docking friendly way to log which restraint we're dealing with
101  def quickRestraintName(self, r):
102  nameList = []
103  ps = r.get_input_particles()
104 
105  for p in ps:
106  if (self.isAtomParticle(p) == 0):
107  continue
108  isFlexible = 0
109  if ((self.quickParticleName(p) in self.flexibleAtoms) == 1):
110  isFlexible = 1
111  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
112 
113  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
114 
115  def quickParticleName(self, particle):
116  return atomicDominoUtilities.quickParticleName(particle)
117 
118  # output current time
119  def logTime(self, label):
120  nextTime = time.asctime(time.localtime(time.time()))
121  print("%s:\t %s" % (label, nextTime))
122  nextTimeEntry = [label, nextTime]
123  self.times.append(nextTimeEntry)
124  self.rawTimes.append(time.time())
125 
126  # output all times that were saved by logTime()
127  def outputTimes(self):
128  print("Time for each step:")
129  for timeEntry in self.times:
130  label = timeEntry[0]
131  value = timeEntry[1]
132  print("%s:\t%s" % (label, value))
133 
134  def getFlexibleAtoms(self):
135  return self.flexibleAtoms
136 
137  # Helper method to quickly get the chain identifier for a particle
138  def getChain(self, particle):
139  atomDecorator = IMP.atom.Atom(particle)
140  residue = IMP.atom.get_residue(atomDecorator)
141  chain = IMP.atom.get_chain(residue)
142  chainId = chain.get_id()
143  return chainId
144 
145  # Helper method to quickly get the residue index for a particle
146  def getResidue(self, particle):
147  atomDecorator = IMP.atom.Atom(particle)
148  residue = IMP.atom.get_residue(atomDecorator)
149  residueNumber = residue.get_index()
150  return residueNumber
151 
152  def isAtomParticle(self, p):
153  return atomicDominoUtilities.isAtomParticle(p)
154 
155  # Read all particles in restraint; return 1 if all are flexible and 0 if
156  # at least one atom is fixed
157  def allFlexibleAtoms(self, r):
158  ps = r.get_input_particles()
159  for p in ps:
160  if (self.isAtomParticle(p) == 0):
161  continue
162  if ((self.quickParticleName(p) in self.flexibleAtoms) == 0):
163  return 0
164  return 1
165 
166  # Check if we should skip processing of this pair of particles. Don't add restraints between particles that aren't optimized,
167  # and don't add restraints between particles on the same residue (for now
168  # assume stereochemistry is good enough for this)
169  def skipNonBondedPair(self, pair):
170 
171  # skip if same residue
172  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
173  return 1
174 
175  # skip if neither is flexible
176  if ((self.quickParticleName(pair[0]) in self.flexibleAtoms) == 0 and (self.quickParticleName(pair[1]) in self.flexibleAtoms) == 0):
177  return 1
178  return 0
179 
180  # Set up degrees of freedom for the system. Either we are dealing with a fixed protein or a flexible one.
181  # Determine which atoms are flexible and save them in self.flexibleAtoms
182  def initDof(self):
183 
184  peptideChain = self.getParam("peptide_chain")
185  self.flexibleAtoms = {}
186 
187  if (self.getParam("flexible_protein") == "yes"):
188  # All atoms within range of the peptide are flexible; range defined
189  # by close_pair_distance param
190  particlePairs = self.getClosePairs()
191 
192  for pair in particlePairs:
193  p0 = pair[0]
194  p1 = pair[1]
195  p0Chain = self.getChain(p0)
196  p1Chain = self.getChain(p1)
197  if (p0Chain == peptideChain or p1Chain == peptideChain):
198  self.flexibleAtoms[self.quickParticleName(p0)] = 1
199  self.flexibleAtoms[self.quickParticleName(p1)] = 1
200 
201  else:
202  # only peptide atoms are flexible, determined by the chain id
203  leaves = IMP.atom.get_leaves(self.protein)
204  for leaf in leaves:
205  if (self.getChain(leaf) == peptideChain):
206  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
207  self.fixNonFlexibleAtoms()
208 
209  # return a dictionary where the keys are the names of the fixed atoms in
210  # the system
211  def getFixedAtoms(self):
212  leaves = IMP.atom.get_leaves(self.protein)
213  fixedAtoms = {}
214  for leaf in leaves:
215  leafName = self.quickParticleName(leaf)
216  if (leafName in self.flexibleAtoms) == 0:
217  fixedAtoms[leafName] = 1
218  return fixedAtoms
219 
220  # for each flexible atom, give it the coordinates of another flexible atom (ensures that each atom starts out in the binding site)
221  # Although it does guarantee a very random initial configuration, it is
222  # cheating since we are using knowledge of the native state
223  def initializeRandomExisting(self):
224  leaves = IMP.atom.get_leaves(self.protein)
225  particlesToVary = []
226  xyzs = []
227  for leaf in leaves:
228  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 1):
229  particlesToVary.append(self.quickParticleName(leaf))
230  atomToCoordinates = {}
231  for pName in particlesToVary:
232  randomPname = random.choice(particlesToVary)
233  randomP = self.namesToParticles[randomPname]
234  print("setting coordinates for particle %s to the ones currently set for particle %s" % (pName, randomPname))
235  atomToCoordinates[pName] = IMP.core.XYZ(randomP).get_coordinates()
236 
237  for pName in particlesToVary:
238  particle = self.namesToParticles[pName]
239  xyz = IMP.core.XYZ(particle)
240  xyz.set_coordinates(atomToCoordinates[pName])
241  xyzs.append(xyz)
242  return xyzs
243 
244  # Set initial positions of flexible atoms in the system. We are
245  # experimenting with the best way to do this
246  def setInitialPositions(self):
247  initialPositionMethod = self.getParam("initial_position_method")
248 
249  # set random coordinates for atom by creating bounding box around native coordinates, increasing it
250  # by defined offset plus random number (necessary when running on
251  # cluster)
252  if (initialPositionMethod == "random"):
253 
254  myRandom = random.random()
255  initialOffset = float(
256  self.getParam("initial_atom_offset")) + myRandom
257  for particleName in self.flexibleAtoms.keys():
258  p = self.namesToParticles[particleName]
259  xyzDecorator = IMP.core.XYZ(p)
260  coordinates = xyzDecorator.get_coordinates()
261  bb = IMP.algebra.BoundingBox3D(coordinates)
262  bb += initialOffset
263  bb += myRandom
264 
265  randomXyz = IMP.algebra.get_random_vector_in(bb)
266  xyzDecorator.set_coordinates(randomXyz)
267 
268  # for each flexible atom, give it the coordinates of another flexible
269  # atom (ensures that each atom starts out in the binding site)
270  elif (initialPositionMethod == "random_existing"):
271  print("start random existing")
272  xyzs = self.initializeRandomExisting()
273 
274  # same as random_existing except offset each atom a little (doesn't
275  # seem to make too much of a difference)
276  elif (initialPositionMethod == "random_existing_box"):
277  xyzs = self.initializeRandomExisting()
278  initialOffset = float(self.getParam("initial_atom_offset"))
279  for xyzDecorator in xyzs:
280  coordinates = xyzDecorator.get_coordinates()
281  bb = IMP.algebra.BoundingBox3D(coordinates)
282  bb += initialOffset
283 
284  randomXyz = IMP.algebra.get_random_vector_in(bb)
285  xyzDecorator.set_coordinates(randomXyz)
286 
287  elif (initialPositionMethod == "random_box"):
288 
289  # Get bounding box containing all particles within close pair distance of flexible atoms
290  # This doesn't work perfectly; can put initial positions in the fixed atoms accidentally
291  # As constructed it is specific to peptide atoms only and not
292  # flexible protein atoms
293  bsParticles = []
294 
295  for firstPname in self.nonBondedPairs.keys():
296  secondPnames = self.nonBondedPairs[firstPname]
297  for secondPname in secondPnames.keys():
298  if ((secondPname in self.flexibleAtoms) == 0):
299  fixedAtom = self.namesToParticles[firstPname]
300 
301  bsParticles.append(IMP.core.XYZ(fixedAtom))
302  print("added next close atom %s to bounding box particles" % firstPname)
303  # bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert
304  # if I can figure out how
305  peptideParticles = []
306  leaves = IMP.atom.get_leaves(self.protein)
307  for leaf in leaves:
308  if (self.getChain(leaf) == self.getParam("peptide_chain")):
309  peptideParticles.append(IMP.core.XYZ(leaf))
310  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
311  # set initial position for each flexible atoms, and adjust it if it
312  # is too close to a fixed residue
313  fixedAtoms = self.getFixedAtoms()
314  for flexPname in self.flexibleAtoms.keys():
315  goodPosition = 0
316  flexParticle = self.namesToParticles[flexPname]
317  flexXyzDecorator = IMP.core.XYZ(flexParticle)
318  print("processing position for pname %s" % flexPname)
319  while(goodPosition == 0):
320  goodPosition = 1
321  flexXyzDecorator.set_coordinates(
322  IMP.algebra.get_random_vector_in(bsBoundingBox))
323  for fixedPname in fixedAtoms.keys():
324  fixedParticle = self.namesToParticles[fixedPname]
325  distance = IMP.algebra.get_distance(
326  IMP.core.XYZ(fixedParticle).get_coordinates(),
327  IMP.core.XYZ(flexParticle).get_coordinates())
328  if (distance < 2):
329  print("finding new position for %s" % flexPname)
330  goodPosition = 0
331  # start the while loop over which will set new
332  # coordinates
333  break
334 
335  # take a box around the whole protein and randomly initialize atoms
336  # here
337  elif (initialPositionMethod == "random_full"):
338  bb = IMP.atom.get_bounding_box(self.protein)
339  for particleName in self.flexibleAtoms.keys():
340  print("creating random position for particle %s" % particleName)
341  p = self.namesToParticles[particleName]
342  xyzDecorator = IMP.core.XYZ(p)
343  randomXyz = IMP.algebra.get_random_vector_in(bb)
344  myRandom = random.random()
345  bb += myRandom
346  xyzDecorator.set_coordinates(randomXyz)
347 
348  # Read in a file generated from a previous run (fastest way is to
349  # create a new model and copy corresponding XYZ Decorators)
350  elif (initialPositionMethod == "file"):
351 
352  initialPositionFile = self.getParam("saved_initial_atom_positions")
353  print("reading initial positions from %s" % initialPositionFile)
354  initialModel = IMP.Model()
355  initialProtein = IMP.atom.read_pdb(
356  initialPositionFile,
357  initialModel,
359  initialLeaves = IMP.atom.get_leaves(initialProtein)
360  for initialLeaf in initialLeaves:
361  name = self.quickParticleName(initialLeaf)
362  if ((name in self.namesToParticles) == 1):
363  existingLeaf = self.namesToParticles[name]
364  existingLeafXyz = IMP.core.XYZ(existingLeaf)
365  initialLeafXyz = IMP.core.XYZ(initialLeaf)
366  existingLeafXyz.set_coordinates(
367  initialLeafXyz.get_coordinates())
368  else:
369  print("Read in initial positions from file %s but this file contained particle %s which is not in current model" % (initialPositionFile, name))
370  else:
371  print("Please specify valid initial position method (random or file)\n")
372  sys.exit()
373 
374  # output initial positions
375  outputDir = self.getParam("output_directory")
376  initialAtomPositionFile = self.getParam(
377  "initial_atom_positions_output_file")
378  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
379  print("writing output to file %s" % fullOutputFile)
380  IMP.atom.write_pdb(self.protein, fullOutputFile)
381 
382  # get initial score for model
383  initialScore = self.model.evaluate(False)
384  print("initial score for model is %s" % initialScore)
385 
386  # Get non-bonded pairs in the system in preparation for restraining them
387  def getNonBondedPairs(self):
388  nonBondedDefinition = self.getParam("non_bonded_definition")
389  particlePairs = []
390  if (nonBondedDefinition == "close_pairs"):
391  particlePairs = self.getClosePairs()
392  elif (nonBondedDefinition == "random"):
393  particlePairs = self.getRandomParticlePairs()
394  elif (nonBondedDefinition == "manual"):
395  particlePairs = self.readManualRestraints(nativeProtein, 6)
396  else:
397  print("Please specify valid non bonded pair definition")
398  sys.exit()
399  return particlePairs
400 
401  # Get 3D distance between the particles in the pair
402  def getXyzDistance(self, pair):
403  d0 = IMP.core.XYZ(pair[0])
404  d1 = IMP.core.XYZ(pair[1])
405  distance = IMP.core.get_distance(d0, d1)
406  return distance
407 
408  # Get all pairs of particles within the distance defined by
409  # close_pair_distance parameter
410  def getClosePairs(self):
411 
412  # Get a list of all atoms in the protein, and put it in a container
413  leaves = IMP.atom.get_leaves(self.protein)
415  closePairDistance = float(self.getParam("close_pair_distance"))
416  # sometimes closePairContainer returns particles > distanceCutoff, even
417  # with slack = 0
418  useHardCutoff = self.getParam("use_hard_distance_cutoff")
419 
420  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
421 
422  self.model.evaluate(False)
423 
424  particlePairs = nbl.get_particle_pairs()
425  finalPairs = []
426  for pair in particlePairs:
427  distance = self.getXyzDistance(pair)
428  q0 = self.quickParticleName(pair[0])
429  q1 = self.quickParticleName(pair[1])
430 
431  if (useHardCutoff == "0" or (useHardCutoff == "1" and distance < closePairDistance)):
432  finalPairs.append([pair[0], pair[1]])
433 
434  return finalPairs
435 
436  # Add restraints for bond lengths, angles, dihedrals, and impropers
437  def addForceFieldRestraints(self):
438 
439  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
440  if (useForcefieldRestraints == "1"):
441 
443  IMP.atom.get_data_path("top_heav.lib"),
444  IMP.atom.get_data_path("par.lib"))
445 
446  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
447  # impropers) of the CHARMM forcefield
448  bonds = self.topology.add_bonds(self.protein)
449  angles = ff.create_angles(bonds)
450  dihedrals = ff.create_dihedrals(bonds)
451  impropers = self.topology.add_impropers(self.protein)
452 
453  # tracks which particles are together in forcefield restraints
454  # (useful for filtering excluded volume)
455  self.ffParticleGroups = self.initializeParticleNameDict(
456  IMP.atom.get_leaves(self.protein))
457 
458  self.createForceFieldRestraintsForModel(
459  bonds,
460  "bonds",
461  "Bond Restraints",
464  0,
465  1)))
466  self.createForceFieldRestraintsForModel(
467  angles,
468  "angles",
469  "Angle Restraints",
472  0,
473  1)))
474  self.createForceFieldRestraintsForModel(
475  dihedrals,
476  "dihedrals",
477  "Dihedral Restraints",
479  self.createForceFieldRestraintsForModel(
480  impropers,
481  "impropers",
482  "Improper Restraints",
485  0,
486  1)))
487 
488  else:
489  print ("Skipping forcefield restraints")
490 
491  # Create one type of forcefield restraint. The restraint is initially across the full model and needs to be decomposed
492  # so it can be used in the restraint graph.
493  def createForceFieldRestraintsForModel(
494  self,
495  ffParticles,
496  containerName,
497  restraintName,
498  singletonScore):
499 
500  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
501  cont = IMP.container.ListSingletonContainer(self.model, containerName)
502  cont.add_particles(ffParticles)
503  listBondRestraint = IMP.container.SingletonsRestraint(
504  singletonScore, cont, restraintName)
505 
506  # Restraint decomposition has been a little touchy; a couple
507  # workarounds contained
508  decomposedRestraintTemp = listBondRestraint.create_decomposition()
509  decomposedRestraints = IMP.RestraintSet.get_from(
510  decomposedRestraintTemp)
511  rs_restraints = decomposedRestraints.get_restraints()
512 
513  # manually remove the full restraint after getting the decomposition
514  del listBondRestraint
515 
516  count = decomposedRestraints.get_number_of_restraints()
517  for i in range(count):
518 
519  r = decomposedRestraints.get_restraint(i)
520 
521  if (self.allFlexibleAtoms(r) == 1):
522  # add the decomposed restraint to the model
523  self.model.add_restraint(r)
524  r.set_maximum_score(maxForceFieldScore)
525  self.addParticlesToGroups(r)
526  else:
527  t = 1
528  # self.model.remove_restraint(r)
529 
530  # Add all atoms in r to ffParticleGroups; tracks which atoms are restrained by forcefield terms so
531  # we don't restrain them in other methods (e.g. excluded volume) --
532  # essentially a manual pair-filter
533  def addParticlesToGroups(self, r):
534  inputParticles = []
535  ps = r.get_input_particles()
536 
537  for p in ps:
538 
539  if (self.isAtomParticle(p) == 0): # skip particles that aren't actually atoms (e.g. bond particles)
540  continue
541  inputParticles.append(p.get_name())
542 
543  for firstName in inputParticles:
544  for secondName in inputParticles:
545  if (firstName == secondName):
546  continue
547  self.ffParticleGroups[firstName][secondName] = 1
548 
549  # Add excluded volume restraints across close pairs. Still a work in progress as they have been slowing down
550  # the code quite a bit and we'll probably want to turn them on only at a
551  # defined time
552  def addExcludedVolumeRestraints(self):
553 
554  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
555  self.protein, self.model, self.namesToParticles)
556  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
557 
558  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
559  self.model.add_restraint(evr)
560 
561  # Add lennard jones restraints between flexible atoms if they haven't been
562  # added already, and then scale radii of flexible atoms by input scaling
563  # factor
564  def addFlexFlexLjRestraints(self, scalingFactor):
565  if (self.initializedFlexFlexLjRestraints == 0):
566  print("adding initial lennard jones restraints between pairs of flexible atoms")
567  self.initializedFlexFlexLjRestraints = 1
568  leaves = IMP.atom.get_leaves(self.protein)
569  counter = 0
570 
571  sf = IMP.atom.ForceSwitch(6.0, 7.0)
573  for i in range(len(leaves)):
574  iLeaf = leaves[i]
575  for j in range(i + 1, len(leaves)):
576  jLeaf = leaves[j]
577  if ((self.quickParticleName(iLeaf) in self.flexibleAtoms) == 1 and (self.quickParticleName(jLeaf) in self.flexibleAtoms) == 1):
578  ljpr = IMP.core.PairRestraint(
579  ps, [iLeaf, jLeaf], "LennardJones %s_%s" %
580  (i, j))
581  self.model.add_restraint(ljpr)
582  chainHierarchies = IMP.atom.get_by_type(
583  self.protein, IMP.atom.CHAIN_TYPE)
584  peptideChainHierarchy = None
585  print("scaling atomic radii by scaling factor %s" % scalingFactor)
586  # reset radii back to original in preparation for rescaling
587  self.ff.add_radii(self.protein)
588  for pName in self.flexibleAtoms.keys():
589  particle = self.namesToParticles[pName]
590  xyzr = IMP.core.XYZR(particle)
591  radius = xyzr.get_radius()
592  radius *= scalingFactor
593  xyzr.set_radius(radius)
594 
595  # add lennard-jones restraints between all fixed atoms and all flexible
596  # atoms
597  def addFixedFlexibleLjRestraints(self):
598  print("adding initial fixed flexible lj restraints")
599  fixedAtoms = self.getFixedAtoms()
600  sf = IMP.atom.ForceSwitch(6.0, 7.0)
602 
603  for flexPname in self.flexibleAtoms.keys():
604  flexParticle = self.namesToParticles[flexPname]
605  for fixedPname in fixedAtoms.keys():
606  fixedParticle = self.namesToParticles[fixedPname]
607  ljpr = IMP.core.PairRestraint(
608  ps, [fixedParticle, flexParticle], "LennardJones %s_%s" %
609  (flexPname, fixedPname))
610  self.model.add_restraint(ljpr)
611 
612  # actually do the work to create the DOPE restraint
613  def createDopeRestraint(self, pair):
614  q0 = self.quickParticleName(pair[0])
615  q1 = self.quickParticleName(pair[1])
616  # create restraint around this distance, add to model
617  prName = "NonBonded %s_%s" % (q0, q1)
618  pairRestraint = IMP.core.PairRestraint(
619  self.dopePairScore, pair, prName)
620 
621  # print "Added non-bonded restraint between %s and %s" % (q0, q1)
622  self.model.add_restraint(pairRestraint)
623  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
624  pairRestraint.set_maximum_score(maxNonBondedScore)
625  #initialScore = pairRestraint.evaluate(0)
626 
627  # add non-bonded restraints across close pairs. At least one atom in each
628  # pair should be flexible
629  def addClosePairNonBondedRestraints(self):
630  particlePairs = self.getNonBondedPairs()
631 
632  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
633  dopeCutoff = float(self.getParam("dope_cutoff"))
634 
636 
637  ps = 0
638  if (nonBondedRestraintType == "dope"):
639  IMP.atom.add_dope_score_data(self.protein)
640  ps = IMP.atom.DopePairScore(dopeCutoff)
641  self.dopePairScore = ps
642  nonBondedCount = 0
643  self.nonBondedPairs = self.initializeParticleNameDict(
644  IMP.atom.get_leaves(self.protein))
645  for pair in particlePairs:
646 
647  if (self.skipNonBondedPair(pair) == 1):
648  continue
649 
650  self.createDopeRestraint(pair)
651 
652  nonBondedCount += 1
653  self.nonBondedPairs[self.quickParticleName(pair[0])][
654  self.quickParticleName(pair[0])] = 1
655  self.nonBondedPairs[self.quickParticleName(pair[1])][
656  self.quickParticleName(pair[1])] = 1
657 
658  print("Added %s non-bonded restraints" % nonBondedCount)
659  self.model.evaluate(False)
660 
661  def addCompleteNonBondedRestraints(self):
662  leaves = IMP.atom.get_leaves(self.protein)
663  counter = 0
664 
665  for i in range(len(leaves)):
666  iLeaf = leaves[i]
667  for j in range(i + 1, len(leaves)):
668  jLeaf = leaves[j]
669 
670  if ((self.quickParticleName(jLeaf) in self.nonBondedPairs[self.quickParticleName(iLeaf)]) == 1):
671  continue
672  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
673  continue
674 
675  self.createDopeRestraint([iLeaf, jLeaf])
676  counter += 1
677  # print "added complete restraint between %s and %s" %
678  # (self.quickParticleName(iLeaf),
679  # self.quickParticleName(jLeaf))
680  print("added %s restraints for %s particles" % (counter, len(leaves)))
681  self.model.evaluate(False)
682 
683  # lots of output on all restraints
684  def writeAllRestraints(self):
685  print("write all restraints:")
686  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
687  print(self.quickRestraintName(r))
688 
689  # Read parameterss with md temperature / step schedule and save them
690  def initializeMdSchedule(self):
691  schedule = self.getParam("md_schedule")
692  stages = schedule.split()
693  totalSteps = 0
694  for stage in stages:
695  print("unpacking stage %s" % stage)
696  [steps, temperature, excludedVolume] = stage.split('_')
697  totalSteps += int(steps)
698  self.totalMdSteps = totalSteps
699  self.mdStages = stages
700 
701  # Creates roothandle for hdf5 objects interacting with md trajectory
702  # Either we create a new trajectory in this file and read it back in the
703  # same run, or we are reading a saved trajectory
704  def createMdRootHandle(self):
705 
706  mdFileName = ""
707  outputDir = self.getParam("output_directory")
708 
709  if (self.getParam("read_assignments_from_file") == "no"):
710  # create new hdf5 file to which md output will be written
711  mdFileName = self.getParam("md_trajectory_output_file")
712  fileName = os.path.join(outputDir, mdFileName)
713  print("creating rmf file with filename %s" % fileName)
714  rh = RMF.create_rmf_file(fileName)
715  my_kc = rh.add_category("my data")
716  IMP.rmf.add_hierarchy(rh, self.protein)
717 
718  self.rootHandle = rh
719  else:
720  # use existing hdf5 file
721  mdFileName = self.getParam("saved_md_filename")
722 
723  rh = RMF.open_rmf_file(mdFileName)
724  IMP.rmf.set_hierarchies(rh, [self.protein])
725 
726  self.rootHandle = rh
727 
728  def evaluateModel(self):
729  return self.model.evaluate(False)
730 
731  def loadMdHelpers(self):
732 
733  self.createMdRootHandle()
734 
735  self.createMdOptimizer()
736 
737  self.addMdOptimizerStates()
738 
739  self.initializeMdSchedule()
740 
741  self.createdMdOptimizer = 1
742 
743  # Create the optimizer object for running MD
744  def createMdOptimizer(self):
745  print("creating md object")
746  md = IMP.atom.MolecularDynamics(self.model)
747  # hack to get around bug where vx attributes in non-optimized particles
748  # are not being set
749  md.optimize(1)
750  md.set_velocity_cap(100.0)
751  self.mdOptimizer = md
752 
753  def addMdOptimizerStates(self):
754 
755  # VelocityScalingOptimizerState
757  IMP.atom.get_leaves(self.protein),
758  0) # temperature is set later
759  vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
760 
761  # SaveHierarchyConfigurationOptimizerState
762  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
763  [self.protein], self.rootHandle)
764  self.mdOptimizer.add_optimizer_state(hdos)
765 
766  self.hdos = hdos
767  self.vsos = vsos
768 
769  def readTrajectories(self):
770 
771  cgFileName = self.getParam("cg_output_file")
772  bestCgScore = 10000000
773  bestCgScoreFile = ""
774  bestCgRmsd = 10000000
775  bestCgRmsdFile = ""
776 
777  outputDir = self.getParam("output_directory")
778  trajectoryFile = self.getParam("md_trajectory_output_file")
779  fullFile = os.path.join(outputDir, trajectoryFile)
780  rh = RMF.open_rmf_file(fullFile)
781  IMP.rmf.set_hierarchies(rh, [self.protein])
782  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
783  rh, int(self.getParam("cg_interval")), self.protein)
784 
785  if (len(framesToRead) > 0):
786  for cgNumber in framesToRead:
787  # Open next cg trajectory
788  outputDir = self.getParam("output_directory")
789  fullCgFileName = os.path.join(
790  outputDir, "%s%s" %
791  (cgFileName, cgNumber))
792  rh = RMF.open_rmf_file(fullCgFileName)
793  IMP.rmf.set_hierarchies(rh, [self.protein])
794 
795  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
796  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
797  score = self.model.evaluate(False)
798  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
799  print("cg number %s has score %s rmsd %s" % (cgNumber, score, rmsd))
800  if (score < bestCgScore):
801  bestCgScore = score
802  bestCgScoreFile = fullCgFileName
803  if (rmsd < bestCgRmsd):
804  bestCgRmsd = rmsd
805  bestCgRmsdFile = fullCgFileName
806 
807  # output best score information
808  self.singlePdbResults(
809  bestCgScoreFile,
810  -1,
811  self.getParam("best_cg_score_output_file"))
812  self.singlePdbResults(
813  bestCgRmsdFile,
814  -1,
815  self.getParam("best_cg_rmsd_output_file"))
816  self.singlePdbResults(
817  "%s%s" %
818  (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file"))
819  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
820  self.bestCgScore = bestCgScore
821  self.bestCgRmsd = bestCgRmsd
822  self.finalCgRmsd = finalCgRmsd
823 
824  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
825 
826  fullTrajectoryFile = os.path.join(
827  self.getParam("output_directory"),
828  trajectoryFile)
829  fullOutputFile = os.path.join(
830  self.getParam("output_directory"),
831  outputPdbFile)
832  rh = RMF.open_rmf_file(fullTrajectoryFile)
833  IMP.rmf.set_hierarchies(rh, [self.protein])
834  if (frame == -1):
835  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
836  IMP.rmf.load_frame(rh, frame, self.protein)
837  IMP.atom.write_pdb(self.protein, fullOutputFile)
838 
839  def calculateRmsd(self, otherProtein, flexibleAtoms):
840  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
841  otherProtein)
842  otherVector = []
843  modelVector = []
844  for pName in otherNamesToParticles.keys():
845  if ((pName in flexibleAtoms) == 0):
846  continue
847  otherParticle = otherNamesToParticles[pName]
848  modelParticle = self.namesToParticles[pName]
849  otherVector.append(
850  IMP.core.XYZ(otherParticle).get_coordinates())
851  modelVector.append(
852  IMP.core.XYZ(modelParticle).get_coordinates())
853  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
854  return rmsd
855 
856  def calculateNativeRmsd(self, flexibleAtoms):
857 
858  if (self.wroteNativeProtein == 0):
859  pdbName = self.getParam("native_pdb_input_file")
860  self.nativeModel = IMP.Model()
861  self.nativeProtein = IMP.atom.read_pdb(
862  pdbName,
863  self.nativeModel,
865  self.wroteNativeProtein = 1
866 
867  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
868 
869  # Run MD according to the temperature schedule read in from parameters
870  def runMolecularDynamics(self):
871  self.initializedFlexFlexLjRestraints = 0
872  if (self.fixedNonFlexibleAtoms == 0):
873  print("error: before running md, must fix non flexible atoms")
874  sys.exit()
875  stepsSoFar = 0
876  for stage in self.mdStages:
877  [steps, temperature, excludedVolume] = stage.split('_')
878  stepsSoFar += int(steps)
879  self.vsos.set_temperature(int(temperature))
880  if (excludedVolume == "1"): # add lennard jones restraints between flexible atoms and fixed atoms (don't add between flex atoms)
881  self.addFixedFlexibleLjRestraints()
882  elif (excludedVolume != "0"):
883  # add lennard jones restraints between flexible atoms
884  if (self.getParam("excluded_volume_type") == "lj"):
885  self.addFlexFlexLjRestraints(float(excludedVolume))
886  elif (self.getParam("excluded_volume_type") == "ev"):
887  self.addExcludedVolumeRestraints(
888  ) # add full excluded volume restraints (just take everything in the protein)
889  else:
890  "please set excluded_volume_type to either lj or ev"
891  sys.exit()
892  print("running md at temperature %s for %s steps" % (temperature, steps))
893  self.mdOptimizer.optimize(int(steps))
894 
896  self.protein,
897  os.path.join(
898  self.getParam(
899  "output_directory"),
900  "md_after_%s.pdb" %
901  stepsSoFar))
902  print("model score after stage %s is %s" % (stage, self.model.evaluate(False)))
903  print("done running md")
904 
905  def runAllCg(self):
906  cgInterval = int(self.getParam("cg_interval"))
907  outputDir = self.getParam("output_directory")
908  trajectoryFile = self.getParam("md_trajectory_output_file")
909  fullFile = os.path.join(outputDir, trajectoryFile)
910  print("open rmf %s" % fullFile)
911  rh = RMF.open_rmf_file(fullFile)
912  IMP.rmf.set_hierarchies(rh, [self.protein])
913  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
914  rh, cgInterval, self.protein)
915 
916  for i in framesToRead:
917  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
918  self.applyCg(i, cgFileName)
919 
920  # Apply CG to each step of the saved MD trajectory. Writes results to same
921  # file storing the MD trajectory
922  def applyCg(self, mdStepStart, cgFileName):
923 
924  cgSteps = int(self.getParam("cg_steps"))
925 
926  # load md step specified by calling object
927  print("apply cg: loading md frame for cg step start %s" % mdStepStart)
928  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
929 
930  # create new hdf5 file to which cg output will be written
931  outputDir = self.getParam("output_directory")
932  fileName = os.path.join(outputDir, cgFileName)
933  print("creating cg hdf5 file %s" % fileName)
934  rh = RMF.create_rmf_file(fileName)
935  my_kc = rh.add_category("my data")
936  IMP.rmf.add_hierarchy(rh, self.protein)
937 
938  # apply cg
939  cg = IMP.core.ConjugateGradients(self.model)
940  firstScore = self.model.evaluate(False)
941 
942  print("creating optimizer state")
943  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
944  [self.protein], rh)
945  hdos.set_skip_steps(0)
946  # hdos is the optimizer state writing configurations to disk
947  cg.add_optimizer_state(hdos)
948 
949  print("running cg")
950  cg.optimize(cgSteps)
951  secondScore = self.model.evaluate(False)
952  print("cg score after md step %s before %s after %s" % (mdStepStart, firstScore, secondScore))
953  return secondScore
954 
955  # Tell the optimizer not to move atoms we have determined ar fixed
956  def fixNonFlexibleAtoms(self):
957  if (self.createdMdOptimizer == 0):
958  self.createMdOptimizer()
959  # print "ERROR: must create md optimizer and optimize once before set_particles_are_optimized is called"
960  # sys.exit()
961 
962  self.fixedNonFlexibleAtoms = 1
963  leaves = IMP.atom.get_leaves(self.protein)
964  for leaf in leaves:
965  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
966  xyzDecorator = IMP.core.XYZ(leaf)
967  xyzDecorator.set_coordinates_are_optimized(0)
968 
969  def writeOutput(self):
970  leaves = IMP.atom.get_leaves(self.protein)
971  flexiblePeptideAtoms = 0
972  flexibleProteinAtoms = 0
973  fixedPeptideAtoms = 0
974  fixedProteinAtoms = 0
975 
976  for leaf in leaves:
977  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
978  if (self.getChain(leaf) == self.getParam("peptide_chain")):
979  fixedPeptideAtoms += 1
980  else:
981  fixedProteinAtoms += 1
982  else:
983  if (self.getChain(leaf) == self.getParam("peptide_chain")):
984  flexiblePeptideAtoms += 1
985  else:
986  flexibleProteinAtoms += 1
987  print("Flexible peptide atoms: %s" % flexiblePeptideAtoms)
988  print("Fixed peptide atoms: %s" % fixedPeptideAtoms)
989  print("Flexible protein atoms: %s" % flexibleProteinAtoms)
990  print("Fixed protein atoms: %s" % fixedProteinAtoms)
991 
992  def writeOsOutput(self):
993 
994  print("Best cg score: %s" % self.bestCgScore)
995  print("best cg rmsd: %s" % self.bestCgRmsd)
996  print("final cg rmsd: %s" % self.finalCgRmsd)
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
Get the containing chain or Chain() if there is none.
Score the angle based on a UnaryFunction,.
Simple conjugate gradients optimizer.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
static Float get_k_from_standard_deviation(Float sd, Float t=297.15)
Return the k to use for a given Gaussian standard deviation.
Definition: core/Harmonic.h:62
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:64
double get_rmsd(const Selection &s0, const Selection &s1)
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
Lennard-Jones score between a pair of particles.
Simple molecular dynamics optimizer.
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
RestraintsTemp get_restraints(It b, It e)
Definition: RestraintSet.h:113
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Score the bond based on a UnaryFunction,.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
Definition: VectorD.h:209
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Smooth interaction scores by switching the derivatives (force switch).
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27