IMP  2.4.0
The Integrative Modeling Platform
peptideDocker.py
1 from __future__ import print_function
2 import IMP
3 import subprocess
4 import random
5 import IMP.domino
6 import IMP.core
7 import IMP.rmf
8 import RMF
9 
10 import time
11 import IMP.algebra
12 import types
13 import re
14 import sys
15 import operator
16 import os
17 from . import atomicDominoUtilities
18 
19 
20 class PeptideDocker:
21 
22  def __init__(self, parameterFileName):
23  self.createdMdOptimizer = 0
24  self.fixedNonFlexibleAtoms = 0
25  self.readParameters(parameterFileName)
26  self.wroteNativeProtein = 0
27 
28  def loadHelpers(self):
29  self.loadDockingHelpers()
30  self.loadMdHelpers()
31 
32  def readParameters(self, parameterFileName):
33  self.parameters = atomicDominoUtilities.readParameters(
34  parameterFileName)
35 
36  # Processing specific to peptide docking and this script
37  def loadDockingHelpers(self):
38  outputDir = self.getParam("output_directory")
39  mkdirProcess = subprocess.Popen(
40  ['mkdir',
41  '-p',
42  self.getParam('output_directory')],
43  shell=False,
44  stderr=subprocess.PIPE)
45  output = mkdirProcess.communicate()
46 
47  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
48  self.protein)
49 
50  self.times = []
51  self.rawTimes = []
52 
53  # Helper method; create dictionary where the keys are names of all
54  # particles in the system and the values are themselves dictionaries
55  def initializeParticleNameDict(self, leaves):
56  particleDict = {}
57 
58  for leaf in leaves:
59  name = self.quickParticleName(leaf)
60  particleDict[name] = {}
61  return particleDict
62 
63  def getModel(self):
64  return self.model
65 
66  def getProtein(self):
67  return self.protein
68 
69  def createModel(self):
70 
71  pdbName = self.getParam("native_pdb_input_file")
72 
73  self.model = IMP.kernel.Model()
74 
75  self.protein = IMP.atom.read_pdb(
76  pdbName,
77  self.model,
79 
80  # atom radii, forcefield topology, etc. -- no restraints created here
81 
83  IMP.atom.get_data_path("par.lib"))
84 
85  topology = ff.create_topology(self.protein)
86  topology.apply_default_patches()
87 
88  topology.add_atom_types(self.protein)
89 
90  ff.add_radii(self.protein)
91  ff.add_well_depths(self.protein)
92  self.ff = ff
93  self.topology = topology
94 
95  # Get parameter value. Throws dictionary exception if not found
96  def getParam(self, paramName):
97  paramValue = self.parameters[paramName]
98  return paramValue
99 
100  # Peptide-docking friendly way to log which restraint we're dealing with
101  def quickRestraintName(self, r):
102  nameList = []
103  ps = r.get_input_particles()
104 
105  for p in ps:
106  if (self.isAtomParticle(p) == 0):
107  continue
108  isFlexible = 0
109  if ((self.quickParticleName(p) in self.flexibleAtoms) == 1):
110  isFlexible = 1
111  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
112 
113  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
114 
115  def quickParticleName(self, particle):
116  return atomicDominoUtilities.quickParticleName(particle)
117 
118  # output current time
119  def logTime(self, label):
120  nextTime = time.asctime(time.localtime(time.time()))
121  print("%s:\t %s" % (label, nextTime))
122  nextTimeEntry = [label, nextTime]
123  self.times.append(nextTimeEntry)
124  self.rawTimes.append(time.time())
125 
126  # output all times that were saved by logTime()
127  def outputTimes(self):
128  print("Time for each step:")
129  for timeEntry in self.times:
130  label = timeEntry[0]
131  value = timeEntry[1]
132  print("%s:\t%s" % (label, value))
133 
134  def getFlexibleAtoms(self):
135  return self.flexibleAtoms
136 
137  # Helper method to quickly get the chain identifier for a particle
138  def getChain(self, particle):
139  atomDecorator = IMP.atom.Atom(particle)
140  residue = IMP.atom.get_residue(atomDecorator)
141  chain = IMP.atom.get_chain(residue)
142  chainId = chain.get_id()
143  return chainId
144 
145  # Helper method to quickly get the residue index for a particle
146  def getResidue(self, particle):
147  atomDecorator = IMP.atom.Atom(particle)
148  residue = IMP.atom.get_residue(atomDecorator)
149  residueNumber = residue.get_index()
150  return residueNumber
151 
152  def isAtomParticle(self, p):
153  return atomicDominoUtilities.isAtomParticle(p)
154 
155  # Read all particles in restraint; return 1 if all are flexible and 0 if
156  # at least one atom is fixed
157  def allFlexibleAtoms(self, r):
158  ps = r.get_input_particles()
159  for p in ps:
160  if (self.isAtomParticle(p) == 0):
161  continue
162  if ((self.quickParticleName(p) in self.flexibleAtoms) == 0):
163  return 0
164  return 1
165 
166  # Check if we should skip processing of this pair of particles. Don't add restraints between particles that aren't optimized,
167  # and don't add restraints between particles on the same residue (for now
168  # assume stereochemistry is good enough for this)
169  def skipNonBondedPair(self, pair):
170 
171  # skip if same residue
172  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
173  return 1
174 
175  # skip if neither is flexible
176  if ((self.quickParticleName(pair[0]) in self.flexibleAtoms) == 0 and (self.quickParticleName(pair[1]) in self.flexibleAtoms) == 0):
177  return 1
178  return 0
179 
180  # Set up degrees of freedom for the system. Either we are dealing with a fixed protein or a flexible one.
181  # Determine which atoms are flexible and save them in self.flexibleAtoms
182  def initDof(self):
183 
184  peptideChain = self.getParam("peptide_chain")
185  self.flexibleAtoms = {}
186 
187  if (self.getParam("flexible_protein") == "yes"):
188  # All atoms within range of the peptide are flexible; range defined
189  # by close_pair_distance param
190  particlePairs = self.getClosePairs()
191 
192  for pair in particlePairs:
193  p0 = pair[0]
194  p1 = pair[1]
195  p0Chain = self.getChain(p0)
196  p1Chain = self.getChain(p1)
197  if (p0Chain == peptideChain or p1Chain == peptideChain):
198  self.flexibleAtoms[self.quickParticleName(p0)] = 1
199  self.flexibleAtoms[self.quickParticleName(p1)] = 1
200 
201  else:
202  # only peptide atoms are flexible, determined by the chain id
203  leaves = IMP.atom.get_leaves(self.protein)
204  for leaf in leaves:
205  if (self.getChain(leaf) == peptideChain):
206  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
207  self.fixNonFlexibleAtoms()
208 
209  # return a dictionary where the keys are the names of the fixed atoms in
210  # the system
211  def getFixedAtoms(self):
212  leaves = IMP.atom.get_leaves(self.protein)
213  fixedAtoms = {}
214  for leaf in leaves:
215  leafName = self.quickParticleName(leaf)
216  if (leafName in self.flexibleAtoms) == 0:
217  fixedAtoms[leafName] = 1
218  return fixedAtoms
219 
220  # for each flexible atom, give it the coordinates of another flexible atom (ensures that each atom starts out in the binding site)
221  # Although it does guarantee a very random initial configuration, it is
222  # cheating since we are using knowledge of the native state
223  def initializeRandomExisting(self):
224  leaves = IMP.atom.get_leaves(self.protein)
225  particlesToVary = []
226  xyzs = []
227  for leaf in leaves:
228  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 1):
229  particlesToVary.append(self.quickParticleName(leaf))
230  atomToCoordinates = {}
231  for pName in particlesToVary:
232  randomPname = random.choice(particlesToVary)
233  randomP = self.namesToParticles[randomPname]
234  print("setting coordinates for particle %s to the ones currently set for particle %s" % (pName, randomPname))
235  atomToCoordinates[pName] = IMP.core.XYZ(randomP).get_coordinates()
236 
237  for pName in particlesToVary:
238  particle = self.namesToParticles[pName]
239  xyz = IMP.core.XYZ(particle)
240  xyz.set_coordinates(atomToCoordinates[pName])
241  xyzs.append(xyz)
242  return xyzs
243 
244  # Set initial positions of flexible atoms in the system. We are
245  # experimenting with the best way to do this
246  def setInitialPositions(self):
247  initialPositionMethod = self.getParam("initial_position_method")
248 
249  # set random coordinates for atom by creating bounding box around native coordinates, increasing it
250  # by defined offset plus random number (necessary when running on
251  # cluster)
252  if (initialPositionMethod == "random"):
253 
254  myRandom = random.random()
255  initialOffset = float(
256  self.getParam("initial_atom_offset")) + myRandom
257  for particleName in self.flexibleAtoms.keys():
258  p = self.namesToParticles[particleName]
259  xyzDecorator = IMP.core.XYZ(p)
260  coordinates = xyzDecorator.get_coordinates()
261  bb = IMP.algebra.BoundingBox3D(coordinates)
262  bb += initialOffset
263  bb += myRandom
264 
265  randomXyz = IMP.algebra.get_random_vector_in(bb)
266  xyzDecorator.set_coordinates(randomXyz)
267 
268  # for each flexible atom, give it the coordinates of another flexible
269  # atom (ensures that each atom starts out in the binding site)
270  elif (initialPositionMethod == "random_existing"):
271  print("start random existing")
272  xyzs = self.initializeRandomExisting()
273 
274  # same as random_existing except offset each atom a little (doesn't
275  # seem to make too much of a difference)
276  elif (initialPositionMethod == "random_existing_box"):
277  xyzs = self.initializeRandomExisting()
278  initialOffset = float(self.getParam("initial_atom_offset"))
279  for xyzDecorator in xyzs:
280  coordinates = xyzDecorator.get_coordinates()
281  bb = IMP.algebra.BoundingBox3D(coordinates)
282  bb += initialOffset
283 
284  randomXyz = IMP.algebra.get_random_vector_in(bb)
285  xyzDecorator.set_coordinates(randomXyz)
286 
287  elif (initialPositionMethod == "random_box"):
288 
289  # Get bounding box containing all particles within close pair distance of flexible atoms
290  # This doesn't work perfectly; can put initial positions in the fixed atoms accidentally
291  # As constructed it is specific to peptide atoms only and not
292  # flexible protein atoms
293  bsParticles = []
294 
295  for firstPname in self.nonBondedPairs.keys():
296  secondPnames = self.nonBondedPairs[firstPname]
297  for secondPname in secondPnames.keys():
298  if ((secondPname in self.flexibleAtoms) == 0):
299  fixedAtom = self.namesToParticles[firstPname]
300 
301  bsParticles.append(IMP.core.XYZ(fixedAtom))
302  print("added next close atom %s to bounding box particles" % firstPname)
303  # bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert
304  # if I can figure out how
305  peptideParticles = []
306  leaves = IMP.atom.get_leaves(self.protein)
307  for leaf in leaves:
308  if (self.getChain(leaf) == self.getParam("peptide_chain")):
309  peptideParticles.append(IMP.core.XYZ(leaf))
310  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
311  # set initial position for each flexible atoms, and adjust it if it
312  # is too close to a fixed residue
313  fixedAtoms = self.getFixedAtoms()
314  for flexPname in self.flexibleAtoms.keys():
315  goodPosition = 0
316  flexParticle = self.namesToParticles[flexPname]
317  flexXyzDecorator = IMP.core.XYZ(flexParticle)
318  print("processing position for pname %s" % flexPname)
319  while(goodPosition == 0):
320  goodPosition = 1
321  flexXyzDecorator.set_coordinates(
322  IMP.algebra.get_random_vector_in(bsBoundingBox))
323  for fixedPname in fixedAtoms.keys():
324  fixedParticle = self.namesToParticles[fixedPname]
325  distance = IMP.algebra.get_distance(
326  IMP.core.XYZ(fixedParticle).get_coordinates(),
327  IMP.core.XYZ(flexParticle).get_coordinates())
328  if (distance < 2):
329  print("finding new position for %s" % flexPname)
330  goodPosition = 0
331  # start the while loop over which will set new
332  # coordinates
333  break
334 
335  # take a box around the whole protein and randomly initialize atoms
336  # here
337  elif (initialPositionMethod == "random_full"):
338  bb = IMP.atom.get_bounding_box(self.protein)
339  for particleName in self.flexibleAtoms.keys():
340  print("creating random position for particle %s" % particleName)
341  p = self.namesToParticles[particleName]
342  xyzDecorator = IMP.core.XYZ(p)
343  randomXyz = IMP.algebra.get_random_vector_in(bb)
344  myRandom = random.random()
345  bb += myRandom
346  xyzDecorator.set_coordinates(randomXyz)
347 
348  # Read in a file generated from a previous run (fastest way is to
349  # create a new model and copy corresponding XYZ Decorators)
350  elif (initialPositionMethod == "file"):
351 
352  initialPositionFile = self.getParam("saved_initial_atom_positions")
353  print("reading initial positions from %s" % initialPositionFile)
354  initialModel = IMP.kernel.Model()
355  initialProtein = IMP.atom.read_pdb(
356  initialPositionFile,
357  initialModel,
359  initialLeaves = IMP.atom.get_leaves(initialProtein)
360  for initialLeaf in initialLeaves:
361  name = self.quickParticleName(initialLeaf)
362  if ((name in self.namesToParticles) == 1):
363  existingLeaf = self.namesToParticles[name]
364  existingLeafXyz = IMP.core.XYZ(existingLeaf)
365  initialLeafXyz = IMP.core.XYZ(initialLeaf)
366  existingLeafXyz.set_coordinates(
367  initialLeafXyz.get_coordinates())
368  else:
369  print("Read in initial positions from file %s but this file contained particle %s which is not in current model" % (initialPositionFile, name))
370  else:
371  print("Please specify valid initial position method (random or file)\n")
372  sys.exit()
373 
374  # output initial positions
375  outputDir = self.getParam("output_directory")
376  initialAtomPositionFile = self.getParam(
377  "initial_atom_positions_output_file")
378  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
379  print("writing output to file %s" % fullOutputFile)
380  IMP.atom.write_pdb(self.protein, fullOutputFile)
381 
382  # get initial score for model
383  initialScore = self.model.evaluate(False)
384  print("initial score for model is %s" % initialScore)
385 
386  # Get non-bonded pairs in the system in preparation for restraining them
387  def getNonBondedPairs(self):
388  nonBondedDefinition = self.getParam("non_bonded_definition")
389  particlePairs = []
390  if (nonBondedDefinition == "close_pairs"):
391  particlePairs = self.getClosePairs()
392  elif (nonBondedDefinition == "random"):
393  particlePairs = self.getRandomParticlePairs()
394  elif (nonBondedDefinition == "manual"):
395  particlePairs = self.readManualRestraints(nativeProtein, 6)
396  else:
397  print("Please specify valid non bonded pair definition")
398  sys.exit()
399  return particlePairs
400 
401  # Get 3D distance between the particles in the pair
402  def getXyzDistance(self, pair):
403  d0 = IMP.core.XYZ(pair[0])
404  d1 = IMP.core.XYZ(pair[1])
405  distance = IMP.core.get_distance(d0, d1)
406  return distance
407 
408  # Get all pairs of particles within the distance defined by
409  # close_pair_distance parameter
410  def getClosePairs(self):
411 
412  # Get a list of all atoms in the protein, and put it in a container
413  leaves = IMP.atom.get_leaves(self.protein)
415  closePairDistance = float(self.getParam("close_pair_distance"))
416  # sometimes closePairContainer returns particles > distanceCutoff, even
417  # with slack = 0
418  useHardCutoff = self.getParam("use_hard_distance_cutoff")
419 
420  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
421 
422  self.model.evaluate(False)
423 
424  particlePairs = nbl.get_particle_pairs()
425  finalPairs = []
426  for pair in particlePairs:
427  distance = self.getXyzDistance(pair)
428  q0 = self.quickParticleName(pair[0])
429  q1 = self.quickParticleName(pair[1])
430 
431  if (useHardCutoff == "0" or (useHardCutoff == "1" and distance < closePairDistance)):
432  finalPairs.append([pair[0], pair[1]])
433 
434  return finalPairs
435 
436  # Add restraints for bond lengths, angles, dihedrals, and impropers
437  def addForceFieldRestraints(self):
438 
439  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
440  if (useForcefieldRestraints == "1"):
441 
443  IMP.atom.get_data_path("top_heav.lib"),
444  IMP.atom.get_data_path("par.lib"))
445 
446  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
447  # impropers) of the CHARMM forcefield
448  bonds = self.topology.add_bonds(self.protein)
449  angles = ff.create_angles(bonds)
450  dihedrals = ff.create_dihedrals(bonds)
451  impropers = self.topology.add_impropers(self.protein)
452 
453  # tracks which particles are together in forcefield restraints
454  # (useful for filtering excluded volume)
455  self.ffParticleGroups = self.initializeParticleNameDict(
456  IMP.atom.get_leaves(self.protein))
457 
458  self.createForceFieldRestraintsForModel(
459  bonds,
460  "bonds",
461  "Bond Restraints",
464  0,
465  1)))
466  self.createForceFieldRestraintsForModel(
467  angles,
468  "angles",
469  "Angle Restraints",
472  0,
473  1)))
474  self.createForceFieldRestraintsForModel(
475  dihedrals,
476  "dihedrals",
477  "Dihedral Restraints",
479  self.createForceFieldRestraintsForModel(
480  impropers,
481  "impropers",
482  "Improper Restraints",
485  0,
486  1)))
487 
488  else:
489  print ("Skipping forcefield restraints")
490 
491  # Create one type of forcefield restraint. The restraint is initially across the full model and needs to be decomposed
492  # so it can be used in the restraint graph.
493  def createForceFieldRestraintsForModel(
494  self,
495  ffParticles,
496  containerName,
497  restraintName,
498  singletonScore):
499 
500  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
501  cont = IMP.container.ListSingletonContainer(self.model, containerName)
502  cont.add_particles(ffParticles)
503  listBondRestraint = IMP.container.SingletonsRestraint(
504  singletonScore, cont, restraintName)
505  self.model.add_restraint(listBondRestraint)
506 
507  # Restraint decomposition has been a little touchy; a couple
508  # workarounds contained
509  decomposedRestraintTemp = listBondRestraint.create_decomposition()
510  decomposedRestraints = IMP.kernel.RestraintSet.get_from(
511  decomposedRestraintTemp)
512  rs_restraints = decomposedRestraints.get_restraints()
513 
514  # manually remove the full restraint after getting the decomposition
515  self.model.remove_restraint(listBondRestraint)
516 
517  count = decomposedRestraints.get_number_of_restraints()
518  for i in range(count):
519 
520  r = decomposedRestraints.get_restraint(i)
521 
522  if (self.allFlexibleAtoms(r) == 1):
523  # add the decomposed restraint to the model
524  self.model.add_restraint(r)
525  self.model.set_maximum_score(r, maxForceFieldScore)
526  self.addParticlesToGroups(r)
527  else:
528  t = 1
529  # self.model.remove_restraint(r)
530 
531  # Add all atoms in r to ffParticleGroups; tracks which atoms are restrained by forcefield terms so
532  # we don't restrain them in other methods (e.g. excluded volume) --
533  # essentially a manual pair-filter
534  def addParticlesToGroups(self, r):
535  inputParticles = []
536  ps = r.get_input_particles()
537 
538  for p in ps:
539 
540  if (self.isAtomParticle(p) == 0): # skip particles that aren't actually atoms (e.g. bond particles)
541  continue
542  inputParticles.append(p.get_name())
543 
544  for firstName in inputParticles:
545  for secondName in inputParticles:
546  if (firstName == secondName):
547  continue
548  self.ffParticleGroups[firstName][secondName] = 1
549 
550  # Add excluded volume restraints across close pairs. Still a work in progress as they have been slowing down
551  # the code quite a bit and we'll probably want to turn them on only at a
552  # defined time
553  def addExcludedVolumeRestraints(self):
554 
555  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
556  self.protein, self.model, self.namesToParticles)
557  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
558 
559  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
560  self.model.add_restraint(evr)
561 
562  # Add lennard jones restraints between flexible atoms if they haven't been
563  # added already, and then scale radii of flexible atoms by input scaling
564  # factor
565  def addFlexFlexLjRestraints(self, scalingFactor):
566  if (self.initializedFlexFlexLjRestraints == 0):
567  print("adding initial lennard jones restraints between pairs of flexible atoms")
568  self.initializedFlexFlexLjRestraints = 1
569  leaves = IMP.atom.get_leaves(self.protein)
570  counter = 0
571 
572  sf = IMP.atom.ForceSwitch(6.0, 7.0)
574  for i in range(len(leaves)):
575  iLeaf = leaves[i]
576  for j in range(i + 1, len(leaves)):
577  jLeaf = leaves[j]
578  if ((self.quickParticleName(iLeaf) in self.flexibleAtoms) == 1 and (self.quickParticleName(jLeaf) in self.flexibleAtoms) == 1):
579  ljpr = IMP.core.PairRestraint(
580  ps, [iLeaf, jLeaf], "LennardJones %s_%s" %
581  (i, j))
582  self.model.add_restraint(ljpr)
583  chainHierarchies = IMP.atom.get_by_type(
584  self.protein, IMP.atom.CHAIN_TYPE)
585  peptideChainHierarchy = None
586  print("scaling atomic radii by scaling factor %s" % scalingFactor)
587  # reset radii back to original in preparation for rescaling
588  self.ff.add_radii(self.protein)
589  for pName in self.flexibleAtoms.keys():
590  particle = self.namesToParticles[pName]
591  xyzr = IMP.core.XYZR(particle)
592  radius = xyzr.get_radius()
593  radius *= scalingFactor
594  xyzr.set_radius(radius)
595 
596  # add lennard-jones restraints between all fixed atoms and all flexible
597  # atoms
598  def addFixedFlexibleLjRestraints(self):
599  print("adding initial fixed flexible lj restraints")
600  fixedAtoms = self.getFixedAtoms()
601  sf = IMP.atom.ForceSwitch(6.0, 7.0)
603 
604  for flexPname in self.flexibleAtoms.keys():
605  flexParticle = self.namesToParticles[flexPname]
606  for fixedPname in fixedAtoms.keys():
607  fixedParticle = self.namesToParticles[fixedPname]
608  ljpr = IMP.core.PairRestraint(
609  ps, [fixedParticle, flexParticle], "LennardJones %s_%s" %
610  (flexPname, fixedPname))
611  self.model.add_restraint(ljpr)
612 
613  # actually do the work to create the DOPE restraint
614  def createDopeRestraint(self, pair):
615  q0 = self.quickParticleName(pair[0])
616  q1 = self.quickParticleName(pair[1])
617  # create restraint around this distance, add to model
618  prName = "NonBonded %s_%s" % (q0, q1)
619  pairRestraint = IMP.core.PairRestraint(
620  self.dopePairScore, pair, prName)
621 
622  # print "Added non-bonded restraint between %s and %s" % (q0, q1)
623  self.model.add_restraint(pairRestraint)
624  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
625  self.model.set_maximum_score(pairRestraint, maxNonBondedScore)
626  #initialScore = pairRestraint.evaluate(0)
627 
628  # add non-bonded restraints across close pairs. At least one atom in each
629  # pair should be flexible
630  def addClosePairNonBondedRestraints(self):
631  particlePairs = self.getNonBondedPairs()
632 
633  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
634  dopeCutoff = float(self.getParam("dope_cutoff"))
635 
637 
638  ps = 0
639  if (nonBondedRestraintType == "dope"):
640  IMP.atom.add_dope_score_data(self.protein)
641  ps = IMP.atom.DopePairScore(dopeCutoff)
642  self.dopePairScore = ps
643  nonBondedCount = 0
644  self.nonBondedPairs = self.initializeParticleNameDict(
645  IMP.atom.get_leaves(self.protein))
646  for pair in particlePairs:
647 
648  if (self.skipNonBondedPair(pair) == 1):
649  continue
650 
651  self.createDopeRestraint(pair)
652 
653  nonBondedCount += 1
654  self.nonBondedPairs[self.quickParticleName(pair[0])][
655  self.quickParticleName(pair[0])] = 1
656  self.nonBondedPairs[self.quickParticleName(pair[1])][
657  self.quickParticleName(pair[1])] = 1
658 
659  print("Added %s non-bonded restraints" % nonBondedCount)
660  self.model.evaluate(False)
661 
662  def addCompleteNonBondedRestraints(self):
663  leaves = IMP.atom.get_leaves(self.protein)
664  counter = 0
665 
666  for i in range(len(leaves)):
667  iLeaf = leaves[i]
668  for j in range(i + 1, len(leaves)):
669  jLeaf = leaves[j]
670 
671  if ((self.quickParticleName(jLeaf) in self.nonBondedPairs[self.quickParticleName(iLeaf)]) == 1):
672  continue
673  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
674  continue
675 
676  self.createDopeRestraint([iLeaf, jLeaf])
677  counter += 1
678  # print "added complete restraint between %s and %s" %
679  # (self.quickParticleName(iLeaf),
680  # self.quickParticleName(jLeaf))
681  print("added %s restraints for %s particles" % (counter, len(leaves)))
682  self.model.evaluate(False)
683 
684  # lots of output on all restraints
685  def writeAllRestraints(self):
686  print("write all restraints:")
687  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
688  print(self.quickRestraintName(r))
689 
690  # Read parameterss with md temperature / step schedule and save them
691  def initializeMdSchedule(self):
692  schedule = self.getParam("md_schedule")
693  stages = schedule.split()
694  totalSteps = 0
695  for stage in stages:
696  print("unpacking stage %s" % stage)
697  [steps, temperature, excludedVolume] = stage.split('_')
698  totalSteps += int(steps)
699  self.totalMdSteps = totalSteps
700  self.mdStages = stages
701 
702  # Creates roothandle for hdf5 objects interacting with md trajectory
703  # Either we create a new trajectory in this file and read it back in the
704  # same run, or we are reading a saved trajectory
705  def createMdRootHandle(self):
706 
707  mdFileName = ""
708  outputDir = self.getParam("output_directory")
709 
710  if (self.getParam("read_assignments_from_file") == "no"):
711  # create new hdf5 file to which md output will be written
712  mdFileName = self.getParam("md_trajectory_output_file")
713  fileName = os.path.join(outputDir, mdFileName)
714  print("creating rmf file with filename %s" % fileName)
715  rh = RMF.create_rmf_file(fileName)
716  my_kc = rh.add_category("my data")
717  IMP.rmf.add_hierarchy(rh, self.protein)
718 
719  self.rootHandle = rh
720  else:
721  # use existing hdf5 file
722  mdFileName = self.getParam("saved_md_filename")
723 
724  rh = RMF.open_rmf_file(mdFileName)
725  IMP.rmf.set_hierarchies(rh, [self.protein])
726 
727  self.rootHandle = rh
728 
729  def evaluateModel(self):
730  return self.model.evaluate(False)
731 
732  def loadMdHelpers(self):
733 
734  self.createMdRootHandle()
735 
736  self.createMdOptimizer()
737 
738  self.addMdOptimizerStates()
739 
740  self.initializeMdSchedule()
741 
742  self.createdMdOptimizer = 1
743 
744  # Create the optimizer object for running MD
745  def createMdOptimizer(self):
746  print("creating md object")
747  md = IMP.atom.MolecularDynamics(self.model)
748  # hack to get around bug where vx attributes in non-optimized particles
749  # are not being set
750  md.optimize(1)
751  md.set_velocity_cap(100.0)
752  self.mdOptimizer = md
753 
754  def addMdOptimizerStates(self):
755 
756  # VelocityScalingOptimizerState
758  IMP.atom.get_leaves(self.protein),
759  0) # temperature is set later
760  vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
761 
762  # SaveHierarchyConfigurationOptimizerState
763  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
764  [self.protein], self.rootHandle)
765  self.mdOptimizer.add_optimizer_state(hdos)
766 
767  self.hdos = hdos
768  self.vsos = vsos
769 
770  def readTrajectories(self):
771 
772  cgFileName = self.getParam("cg_output_file")
773  bestCgScore = 10000000
774  bestCgScoreFile = ""
775  bestCgRmsd = 10000000
776  bestCgRmsdFile = ""
777 
778  outputDir = self.getParam("output_directory")
779  trajectoryFile = self.getParam("md_trajectory_output_file")
780  fullFile = os.path.join(outputDir, trajectoryFile)
781  rh = RMF.open_rmf_file(fullFile)
782  IMP.rmf.set_hierarchies(rh, [self.protein])
783  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
784  rh, int(self.getParam("cg_interval")), self.protein)
785 
786  if (len(framesToRead) > 0):
787  for cgNumber in framesToRead:
788  # Open next cg trajectory
789  outputDir = self.getParam("output_directory")
790  fullCgFileName = os.path.join(
791  outputDir, "%s%s" %
792  (cgFileName, cgNumber))
793  rh = RMF.open_rmf_file(fullCgFileName)
794  IMP.rmf.set_hierarchies(rh, [self.protein])
795 
796  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
797  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
798  score = self.model.evaluate(False)
799  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
800  print("cg number %s has score %s rmsd %s" % (cgNumber, score, rmsd))
801  if (score < bestCgScore):
802  bestCgScore = score
803  bestCgScoreFile = fullCgFileName
804  if (rmsd < bestCgRmsd):
805  bestCgRmsd = rmsd
806  bestCgRmsdFile = fullCgFileName
807 
808  # output best score information
809  self.singlePdbResults(
810  bestCgScoreFile,
811  -1,
812  self.getParam("best_cg_score_output_file"))
813  self.singlePdbResults(
814  bestCgRmsdFile,
815  -1,
816  self.getParam("best_cg_rmsd_output_file"))
817  self.singlePdbResults(
818  "%s%s" %
819  (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file"))
820  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
821  self.bestCgScore = bestCgScore
822  self.bestCgRmsd = bestCgRmsd
823  self.finalCgRmsd = finalCgRmsd
824 
825  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
826 
827  fullTrajectoryFile = os.path.join(
828  self.getParam("output_directory"),
829  trajectoryFile)
830  fullOutputFile = os.path.join(
831  self.getParam("output_directory"),
832  outputPdbFile)
833  rh = RMF.open_rmf_file(fullTrajectoryFile)
834  IMP.rmf.set_hierarchies(rh, [self.protein])
835  if (frame == -1):
836  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
837  IMP.rmf.load_frame(rh, frame, self.protein)
838  IMP.atom.write_pdb(self.protein, fullOutputFile)
839 
840  def calculateRmsd(self, otherProtein, flexibleAtoms):
841  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
842  otherProtein)
843  otherVector = []
844  modelVector = []
845  for pName in otherNamesToParticles.keys():
846  if ((pName in flexibleAtoms) == 0):
847  continue
848  otherParticle = otherNamesToParticles[pName]
849  modelParticle = self.namesToParticles[pName]
850  otherVector.append(
851  IMP.core.XYZ(otherParticle).get_coordinates())
852  modelVector.append(
853  IMP.core.XYZ(modelParticle).get_coordinates())
854  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
855  return rmsd
856 
857  def calculateNativeRmsd(self, flexibleAtoms):
858 
859  if (self.wroteNativeProtein == 0):
860  pdbName = self.getParam("native_pdb_input_file")
861  self.nativeModel = IMP.kernel.Model()
862  self.nativeProtein = IMP.atom.read_pdb(
863  pdbName,
864  self.nativeModel,
866  self.wroteNativeProtein = 1
867 
868  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
869 
870  # Run MD according to the temperature schedule read in from parameters
871  def runMolecularDynamics(self):
872  self.initializedFlexFlexLjRestraints = 0
873  if (self.fixedNonFlexibleAtoms == 0):
874  print("error: before running md, must fix non flexible atoms")
875  sys.exit()
876  stepsSoFar = 0
877  for stage in self.mdStages:
878  [steps, temperature, excludedVolume] = stage.split('_')
879  stepsSoFar += int(steps)
880  self.vsos.set_temperature(int(temperature))
881  if (excludedVolume == "1"): # add lennard jones restraints between flexible atoms and fixed atoms (don't add between flex atoms)
882  self.addFixedFlexibleLjRestraints()
883  elif (excludedVolume != "0"):
884  # add lennard jones restraints between flexible atoms
885  if (self.getParam("excluded_volume_type") == "lj"):
886  self.addFlexFlexLjRestraints(float(excludedVolume))
887  elif (self.getParam("excluded_volume_type") == "ev"):
888  self.addExcludedVolumeRestraints(
889  ) # add full excluded volume restraints (just take everything in the protein)
890  else:
891  "please set excluded_volume_type to either lj or ev"
892  sys.exit()
893  print("running md at temperature %s for %s steps" % (temperature, steps))
894  self.mdOptimizer.optimize(int(steps))
895 
897  self.protein,
898  os.path.join(
899  self.getParam(
900  "output_directory"),
901  "md_after_%s.pdb" %
902  stepsSoFar))
903  print("model score after stage %s is %s" % (stage, self.model.evaluate(False)))
904  print("done running md")
905 
906  def runAllCg(self):
907  cgInterval = int(self.getParam("cg_interval"))
908  outputDir = self.getParam("output_directory")
909  trajectoryFile = self.getParam("md_trajectory_output_file")
910  fullFile = os.path.join(outputDir, trajectoryFile)
911  print("open rmf %s" % fullFile)
912  rh = RMF.open_rmf_file(fullFile)
913  IMP.rmf.set_hierarchies(rh, [self.protein])
914  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
915  rh, cgInterval, self.protein)
916 
917  for i in framesToRead:
918  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
919  self.applyCg(i, cgFileName)
920 
921  # Apply CG to each step of the saved MD trajectory. Writes results to same
922  # file storing the MD trajectory
923  def applyCg(self, mdStepStart, cgFileName):
924 
925  cgSteps = int(self.getParam("cg_steps"))
926 
927  # load md step specified by calling object
928  print("apply cg: loading md frame for cg step start %s" % mdStepStart)
929  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
930 
931  # create new hdf5 file to which cg output will be written
932  outputDir = self.getParam("output_directory")
933  fileName = os.path.join(outputDir, cgFileName)
934  print("creating cg hdf5 file %s" % fileName)
935  rh = RMF.create_rmf_file(fileName)
936  my_kc = rh.add_category("my data")
937  IMP.rmf.add_hierarchy(rh, self.protein)
938 
939  # apply cg
940  cg = IMP.core.ConjugateGradients(self.model)
941  firstScore = self.model.evaluate(False)
942 
943  print("creating optimizer state")
944  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
945  [self.protein], rh)
946  hdos.set_skip_steps(0)
947  # hdos is the optimizer state writing configurations to disk
948  cg.add_optimizer_state(hdos)
949 
950  print("running cg")
951  cg.optimize(cgSteps)
952  secondScore = self.model.evaluate(False)
953  print("cg score after md step %s before %s after %s" % (mdStepStart, firstScore, secondScore))
954  return secondScore
955 
956  # Tell the optimizer not to move atoms we have determined ar fixed
957  def fixNonFlexibleAtoms(self):
958  if (self.createdMdOptimizer == 0):
959  self.createMdOptimizer()
960  # print "ERROR: must create md optimizer and optimize once before set_particles_are_optimized is called"
961  # sys.exit()
962 
963  self.fixedNonFlexibleAtoms = 1
964  leaves = IMP.atom.get_leaves(self.protein)
965  for leaf in leaves:
966  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
967  xyzDecorator = IMP.core.XYZ(leaf)
968  xyzDecorator.set_coordinates_are_optimized(0)
969 
970  def writeOutput(self):
971  restraintCount = self.model.get_number_of_restraints()
972  leaves = IMP.atom.get_leaves(self.protein)
973  flexiblePeptideAtoms = 0
974  flexibleProteinAtoms = 0
975  fixedPeptideAtoms = 0
976  fixedProteinAtoms = 0
977 
978  for leaf in leaves:
979  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
980  if (self.getChain(leaf) == self.getParam("peptide_chain")):
981  fixedPeptideAtoms += 1
982  else:
983  fixedProteinAtoms += 1
984  else:
985  if (self.getChain(leaf) == self.getParam("peptide_chain")):
986  flexiblePeptideAtoms += 1
987  else:
988  flexibleProteinAtoms += 1
989  print("Flexible peptide atoms: %s" % flexiblePeptideAtoms)
990  print("Fixed peptide atoms: %s" % fixedPeptideAtoms)
991  print("Flexible protein atoms: %s" % flexibleProteinAtoms)
992  print("Fixed protein atoms: %s" % fixedProteinAtoms)
993  print("Total number of restraints: %s" % restraintCount)
994 
995  def writeOsOutput(self):
996 
997  print("Best cg score: %s" % self.bestCgScore)
998  print("best cg rmsd: %s" % self.bestCgRmsd)
999  print("final cg rmsd: %s" % self.finalCgRmsd)
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Score the angle based on a UnaryFunction,.
Simple conjugate gradients optimizer.
algebra::BoundingBoxD< 3 > get_bounding_box(const Hierarchy &h)
Get a bounding box for the Hierarchy.
void load_frame(RMF::FileConstHandle file, unsigned int frame)
Definition: frames.h:27
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_rmsd(const core::XYZs &s0, const core::XYZs &s1, const IMP::algebra::Transformation3D &tr_for_second)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:87
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
static Float get_k_from_standard_deviation(Float sd, Float t=297.15)
Return the k to use for a given Gaussian standard deviation.
Definition: core/Harmonic.h:62
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:63
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
Lennard-Jones score between a pair of particles.
Simple molecular dynamics optimizer.
Store a kernel::ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Score the bond based on a UnaryFunction,.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
Definition: VectorD.h:209
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Smooth interaction scores by switching the derivatives (force switch).
void read_pdb(base::TextInput input, int model, Hierarchy h)
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:24
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27