IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/22
The Integrative Modeling Platform
peptideDocker.py
1 import IMP
2 import subprocess
3 import random
4 import IMP.domino
5 import IMP.core
6 import IMP.rmf
7 import RMF
8 
9 import time
10 import IMP.algebra
11 import sys
12 import os
13 from . import atomicDominoUtilities
14 
15 
16 class PeptideDocker:
17 
18  def __init__(self, parameterFileName):
19  self.createdMdOptimizer = 0
20  self.fixedNonFlexibleAtoms = 0
21  self.readParameters(parameterFileName)
22  self.wroteNativeProtein = 0
23 
24  def loadHelpers(self):
25  self.loadDockingHelpers()
26  self.loadMdHelpers()
27 
28  def readParameters(self, parameterFileName):
29  self.parameters = atomicDominoUtilities.readParameters(
30  parameterFileName)
31 
32  # Processing specific to peptide docking and this script
33  def loadDockingHelpers(self):
34  mkdirProcess = subprocess.Popen(
35  ['mkdir',
36  '-p',
37  self.getParam('output_directory')],
38  shell=False,
39  stderr=subprocess.PIPE)
40  _ = mkdirProcess.communicate()
41 
42  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
43  self.protein)
44 
45  self.times = []
46  self.rawTimes = []
47 
48  # Helper method; create dictionary where the keys are names of all
49  # particles in the system and the values are themselves dictionaries
50  def initializeParticleNameDict(self, leaves):
51  particleDict = {}
52 
53  for leaf in leaves:
54  name = self.quickParticleName(leaf)
55  particleDict[name] = {}
56  return particleDict
57 
58  def getModel(self):
59  return self.model
60 
61  def getProtein(self):
62  return self.protein
63 
64  def createModel(self):
65 
66  pdbName = self.getParam("native_pdb_input_file")
67 
68  self.model = IMP.Model()
69 
70  self.protein = IMP.atom.read_pdb(
71  pdbName,
72  self.model,
74 
75  # atom radii, forcefield topology, etc. -- no restraints created here
76 
78  IMP.atom.get_data_path("par.lib"))
79 
80  topology = ff.create_topology(self.protein)
81  topology.apply_default_patches()
82 
83  topology.add_atom_types(self.protein)
84 
85  ff.add_radii(self.protein)
86  ff.add_well_depths(self.protein)
87  self.ff = ff
88  self.topology = topology
89 
90  # Get parameter value. Throws dictionary exception if not found
91  def getParam(self, paramName):
92  paramValue = self.parameters[paramName]
93  return paramValue
94 
95  # Peptide-docking friendly way to log which restraint we're dealing with
96  def quickRestraintName(self, r):
97  nameList = []
98  ps = r.get_input_particles()
99 
100  for p in ps:
101  if (self.isAtomParticle(p) == 0):
102  continue
103  isFlexible = 0
104  if ((self.quickParticleName(p) in self.flexibleAtoms) == 1):
105  isFlexible = 1
106  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
107 
108  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
109 
110  def quickParticleName(self, particle):
111  return atomicDominoUtilities.quickParticleName(particle)
112 
113  # output current time
114  def logTime(self, label):
115  nextTime = time.asctime(time.localtime(time.time()))
116  print("%s:\t %s" % (label, nextTime))
117  nextTimeEntry = [label, nextTime]
118  self.times.append(nextTimeEntry)
119  self.rawTimes.append(time.time())
120 
121  # output all times that were saved by logTime()
122  def outputTimes(self):
123  print("Time for each step:")
124  for timeEntry in self.times:
125  label = timeEntry[0]
126  value = timeEntry[1]
127  print("%s:\t%s" % (label, value))
128 
129  def getFlexibleAtoms(self):
130  return self.flexibleAtoms
131 
132  # Helper method to quickly get the chain identifier for a particle
133  def getChain(self, particle):
134  atomDecorator = IMP.atom.Atom(particle)
135  residue = IMP.atom.get_residue(atomDecorator)
136  chain = IMP.atom.get_chain(residue)
137  chainId = chain.get_id()
138  return chainId
139 
140  # Helper method to quickly get the residue index for a particle
141  def getResidue(self, particle):
142  atomDecorator = IMP.atom.Atom(particle)
143  residue = IMP.atom.get_residue(atomDecorator)
144  residueNumber = residue.get_index()
145  return residueNumber
146 
147  def isAtomParticle(self, p):
148  return atomicDominoUtilities.isAtomParticle(p)
149 
150  # Read all particles in restraint; return 1 if all are flexible and 0 if
151  # at least one atom is fixed
152  def allFlexibleAtoms(self, r):
153  ps = r.get_input_particles()
154  for p in ps:
155  if (self.isAtomParticle(p) == 0):
156  continue
157  if ((self.quickParticleName(p) in self.flexibleAtoms) == 0):
158  return 0
159  return 1
160 
161  # Check if we should skip processing of this pair of particles. Don't add
162  # restraints between particles that aren't optimized,
163  # and don't add restraints between particles on the same residue (for now
164  # assume stereochemistry is good enough for this)
165  def skipNonBondedPair(self, pair):
166 
167  # skip if same residue
168  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
169  return 1
170 
171  # skip if neither is flexible
172  if ((self.quickParticleName(pair[0]) in self.flexibleAtoms) == 0
173  and (self.quickParticleName(pair[1])
174  in self.flexibleAtoms) == 0):
175  return 1
176  return 0
177 
178  # Set up degrees of freedom for the system. Either we are dealing with
179  # a fixed protein or a flexible one.
180  # Determine which atoms are flexible and save them in self.flexibleAtoms
181  def initDof(self):
182 
183  peptideChain = self.getParam("peptide_chain")
184  self.flexibleAtoms = {}
185 
186  if (self.getParam("flexible_protein") == "yes"):
187  # All atoms within range of the peptide are flexible; range defined
188  # by close_pair_distance param
189  particlePairs = self.getClosePairs()
190 
191  for pair in particlePairs:
192  p0 = pair[0]
193  p1 = pair[1]
194  p0Chain = self.getChain(p0)
195  p1Chain = self.getChain(p1)
196  if (p0Chain == peptideChain or p1Chain == peptideChain):
197  self.flexibleAtoms[self.quickParticleName(p0)] = 1
198  self.flexibleAtoms[self.quickParticleName(p1)] = 1
199 
200  else:
201  # only peptide atoms are flexible, determined by the chain id
202  leaves = IMP.atom.get_leaves(self.protein)
203  for leaf in leaves:
204  if (self.getChain(leaf) == peptideChain):
205  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
206  self.fixNonFlexibleAtoms()
207 
208  # return a dictionary where the keys are the names of the fixed atoms in
209  # the system
210  def getFixedAtoms(self):
211  leaves = IMP.atom.get_leaves(self.protein)
212  fixedAtoms = {}
213  for leaf in leaves:
214  leafName = self.quickParticleName(leaf)
215  if (leafName in self.flexibleAtoms) == 0:
216  fixedAtoms[leafName] = 1
217  return fixedAtoms
218 
219  # for each flexible atom, give it the coordinates of another flexible
220  # atom (ensures that each atom starts out in the binding site)
221  # Although it does guarantee a very random initial configuration, it is
222  # cheating since we are using knowledge of the native state
223  def initializeRandomExisting(self):
224  leaves = IMP.atom.get_leaves(self.protein)
225  particlesToVary = []
226  xyzs = []
227  for leaf in leaves:
228  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 1):
229  particlesToVary.append(self.quickParticleName(leaf))
230  atomToCoordinates = {}
231  for pName in particlesToVary:
232  randomPname = random.choice(particlesToVary)
233  randomP = self.namesToParticles[randomPname]
234  print("setting coordinates for particle %s to the ones currently "
235  "set for particle %s" % (pName, randomPname))
236  atomToCoordinates[pName] = IMP.core.XYZ(randomP).get_coordinates()
237 
238  for pName in particlesToVary:
239  particle = self.namesToParticles[pName]
240  xyz = IMP.core.XYZ(particle)
241  xyz.set_coordinates(atomToCoordinates[pName])
242  xyzs.append(xyz)
243  return xyzs
244 
245  # Set initial positions of flexible atoms in the system. We are
246  # experimenting with the best way to do this
247  def setInitialPositions(self):
248  initialPositionMethod = self.getParam("initial_position_method")
249 
250  # set random coordinates for atom by creating bounding box around
251  # native coordinates, increasing it by defined offset plus random
252  # number (necessary when running on cluster)
253  if (initialPositionMethod == "random"):
254 
255  myRandom = random.random()
256  initialOffset = float(
257  self.getParam("initial_atom_offset")) + myRandom
258  for particleName in self.flexibleAtoms.keys():
259  p = self.namesToParticles[particleName]
260  xyzDecorator = IMP.core.XYZ(p)
261  coordinates = xyzDecorator.get_coordinates()
262  bb = IMP.algebra.BoundingBox3D(coordinates)
263  bb += initialOffset
264  bb += myRandom
265 
266  randomXyz = IMP.algebra.get_random_vector_in(bb)
267  xyzDecorator.set_coordinates(randomXyz)
268 
269  # for each flexible atom, give it the coordinates of another flexible
270  # atom (ensures that each atom starts out in the binding site)
271  elif (initialPositionMethod == "random_existing"):
272  print("start random existing")
273  xyzs = self.initializeRandomExisting()
274 
275  # same as random_existing except offset each atom a little (doesn't
276  # seem to make too much of a difference)
277  elif (initialPositionMethod == "random_existing_box"):
278  xyzs = self.initializeRandomExisting()
279  initialOffset = float(self.getParam("initial_atom_offset"))
280  for xyzDecorator in xyzs:
281  coordinates = xyzDecorator.get_coordinates()
282  bb = IMP.algebra.BoundingBox3D(coordinates)
283  bb += initialOffset
284 
285  randomXyz = IMP.algebra.get_random_vector_in(bb)
286  xyzDecorator.set_coordinates(randomXyz)
287 
288  elif (initialPositionMethod == "random_box"):
289 
290  # Get bounding box containing all particles within close pair
291  # distance of flexible atoms
292  # This doesn't work perfectly; can put initial positions in the
293  # fixed atoms accidentally
294  # As constructed it is specific to peptide atoms only and not
295  # flexible protein atoms
296  bsParticles = []
297 
298  for firstPname in self.nonBondedPairs.keys():
299  secondPnames = self.nonBondedPairs[firstPname]
300  for secondPname in secondPnames.keys():
301  if ((secondPname in self.flexibleAtoms) == 0):
302  fixedAtom = self.namesToParticles[firstPname]
303 
304  bsParticles.append(IMP.core.XYZ(fixedAtom))
305  print("added next close atom %s to bounding box "
306  "particles" % firstPname)
307  # bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert
308  # if I can figure out how
309  peptideParticles = []
310  leaves = IMP.atom.get_leaves(self.protein)
311  for leaf in leaves:
312  if (self.getChain(leaf) == self.getParam("peptide_chain")):
313  peptideParticles.append(IMP.core.XYZ(leaf))
314  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
315  # set initial position for each flexible atoms, and adjust it if it
316  # is too close to a fixed residue
317  fixedAtoms = self.getFixedAtoms()
318  for flexPname in self.flexibleAtoms.keys():
319  goodPosition = 0
320  flexParticle = self.namesToParticles[flexPname]
321  flexXyzDecorator = IMP.core.XYZ(flexParticle)
322  print("processing position for pname %s" % flexPname)
323  while goodPosition == 0:
324  goodPosition = 1
325  flexXyzDecorator.set_coordinates(
326  IMP.algebra.get_random_vector_in(bsBoundingBox))
327  for fixedPname in fixedAtoms.keys():
328  fixedParticle = self.namesToParticles[fixedPname]
329  distance = IMP.algebra.get_distance(
330  IMP.core.XYZ(fixedParticle).get_coordinates(),
331  IMP.core.XYZ(flexParticle).get_coordinates())
332  if (distance < 2):
333  print("finding new position for %s" % flexPname)
334  goodPosition = 0
335  # start the while loop over which will set new
336  # coordinates
337  break
338 
339  # take a box around the whole protein and randomly initialize atoms
340  # here
341  elif (initialPositionMethod == "random_full"):
342  bb = IMP.atom.get_bounding_box(self.protein)
343  for particleName in self.flexibleAtoms.keys():
344  print("creating random position for particle %s"
345  % particleName)
346  p = self.namesToParticles[particleName]
347  xyzDecorator = IMP.core.XYZ(p)
348  randomXyz = IMP.algebra.get_random_vector_in(bb)
349  myRandom = random.random()
350  bb += myRandom
351  xyzDecorator.set_coordinates(randomXyz)
352 
353  # Read in a file generated from a previous run (fastest way is to
354  # create a new model and copy corresponding XYZ Decorators)
355  elif (initialPositionMethod == "file"):
356 
357  initialPositionFile = self.getParam("saved_initial_atom_positions")
358  print("reading initial positions from %s" % initialPositionFile)
359  initialModel = IMP.Model()
360  initialProtein = IMP.atom.read_pdb(
361  initialPositionFile,
362  initialModel,
364  initialLeaves = IMP.atom.get_leaves(initialProtein)
365  for initialLeaf in initialLeaves:
366  name = self.quickParticleName(initialLeaf)
367  if ((name in self.namesToParticles) == 1):
368  existingLeaf = self.namesToParticles[name]
369  existingLeafXyz = IMP.core.XYZ(existingLeaf)
370  initialLeafXyz = IMP.core.XYZ(initialLeaf)
371  existingLeafXyz.set_coordinates(
372  initialLeafXyz.get_coordinates())
373  else:
374  print("Read in initial positions from file %s but this "
375  "file contained particle %s which is not in current "
376  "model" % (initialPositionFile, name))
377  else:
378  print("Please specify valid initial position method "
379  "(random or file)\n")
380  sys.exit()
381 
382  # output initial positions
383  outputDir = self.getParam("output_directory")
384  initialAtomPositionFile = self.getParam(
385  "initial_atom_positions_output_file")
386  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
387  print("writing output to file %s" % fullOutputFile)
388  IMP.atom.write_pdb(self.protein, fullOutputFile)
389 
390  # get initial score for model
391  initialScore = self.model.evaluate(False)
392  print("initial score for model is %s" % initialScore)
393 
394  # Get non-bonded pairs in the system in preparation for restraining them
395  def getNonBondedPairs(self):
396  nonBondedDefinition = self.getParam("non_bonded_definition")
397  particlePairs = []
398  if (nonBondedDefinition == "close_pairs"):
399  particlePairs = self.getClosePairs()
400  elif (nonBondedDefinition == "random"):
401  particlePairs = self.getRandomParticlePairs()
402 # elif (nonBondedDefinition == "manual"):
403 # particlePairs = self.readManualRestraints(nativeProtein, 6)
404  else:
405  print("Please specify valid non bonded pair definition")
406  sys.exit()
407  return particlePairs
408 
409  # Get 3D distance between the particles in the pair
410  def getXyzDistance(self, pair):
411  d0 = IMP.core.XYZ(pair[0])
412  d1 = IMP.core.XYZ(pair[1])
413  distance = IMP.core.get_distance(d0, d1)
414  return distance
415 
416  # Get all pairs of particles within the distance defined by
417  # close_pair_distance parameter
418  def getClosePairs(self):
419 
420  # Get a list of all atoms in the protein, and put it in a container
421  leaves = IMP.atom.get_leaves(self.protein)
423  closePairDistance = float(self.getParam("close_pair_distance"))
424  # sometimes closePairContainer returns particles > distanceCutoff, even
425  # with slack = 0
426  useHardCutoff = self.getParam("use_hard_distance_cutoff")
427 
428  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
429 
430  self.model.evaluate(False)
431 
432  particlePairs = nbl.get_particle_pairs()
433  finalPairs = []
434  for pair in particlePairs:
435  distance = self.getXyzDistance(pair)
436 
437  if (useHardCutoff == "0"
438  or (useHardCutoff == "1"
439  and distance < closePairDistance)):
440  finalPairs.append([pair[0], pair[1]])
441 
442  return finalPairs
443 
444  # Add restraints for bond lengths, angles, dihedrals, and impropers
445  def addForceFieldRestraints(self):
446 
447  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
448  if (useForcefieldRestraints == "1"):
449 
451  IMP.atom.get_data_path("top_heav.lib"),
452  IMP.atom.get_data_path("par.lib"))
453 
454  # Set up and evaluate the stereochemical part (bonds, angles,
455  # dihedrals, impropers) of the CHARMM forcefield
456  bonds = self.topology.add_bonds(self.protein)
457  angles = ff.create_angles(bonds)
458  dihedrals = ff.create_dihedrals(bonds)
459  impropers = self.topology.add_impropers(self.protein)
460 
461  # tracks which particles are together in forcefield restraints
462  # (useful for filtering excluded volume)
463  self.ffParticleGroups = self.initializeParticleNameDict(
464  IMP.atom.get_leaves(self.protein))
465 
466  self.createForceFieldRestraintsForModel(
467  bonds,
468  "bonds",
469  "Bond Restraints",
472  0,
473  1)))
474  self.createForceFieldRestraintsForModel(
475  angles,
476  "angles",
477  "Angle Restraints",
480  0,
481  1)))
482  self.createForceFieldRestraintsForModel(
483  dihedrals,
484  "dihedrals",
485  "Dihedral Restraints",
487  self.createForceFieldRestraintsForModel(
488  impropers,
489  "impropers",
490  "Improper Restraints",
493  0,
494  1)))
495 
496  else:
497  print("Skipping forcefield restraints")
498 
499  # Create one type of forcefield restraint. The restraint is initially
500  # across the full model and needs to be decomposed so it can be used
501  # in the restraint graph.
502  def createForceFieldRestraintsForModel(
503  self,
504  ffParticles,
505  containerName,
506  restraintName,
507  singletonScore):
508 
509  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
510  cont = IMP.container.ListSingletonContainer(self.model, containerName)
511  cont.add_particles(ffParticles)
512  listBondRestraint = IMP.container.SingletonsRestraint(
513  singletonScore, cont, restraintName)
514 
515  # Restraint decomposition has been a little touchy; a couple
516  # workarounds contained
517  decomposedRestraintTemp = listBondRestraint.create_decomposition()
518  decomposedRestraints = IMP.RestraintSet.get_from(
519  decomposedRestraintTemp)
520 
521  # manually remove the full restraint after getting the decomposition
522  del listBondRestraint
523 
524  count = decomposedRestraints.get_number_of_restraints()
525  for i in range(count):
526 
527  r = decomposedRestraints.get_restraint(i)
528 
529  if (self.allFlexibleAtoms(r) == 1):
530  # add the decomposed restraint to the model
531  self.model.add_restraint(r)
532  r.set_maximum_score(maxForceFieldScore)
533  self.addParticlesToGroups(r)
534 
535  # Add all atoms in r to ffParticleGroups; tracks which atoms are
536  # restrained by forcefield terms so we don't restrain them in other
537  # methods (e.g. excluded volume) -- essentially a manual pair-filter
538  def addParticlesToGroups(self, r):
539  inputParticles = []
540  ps = r.get_input_particles()
541 
542  for p in ps:
543 
544  # skip particles that aren't actually atoms (e.g. bond particles)
545  if (self.isAtomParticle(p) == 0):
546  continue
547  inputParticles.append(p.get_name())
548 
549  for firstName in inputParticles:
550  for secondName in inputParticles:
551  if (firstName == secondName):
552  continue
553  self.ffParticleGroups[firstName][secondName] = 1
554 
555  # Add excluded volume restraints across close pairs. Still a work in
556  # progress as they have been slowing down the code quite a bit and
557  # we'll probably want to turn them on only at a defined time
558  def addExcludedVolumeRestraints(self):
559 
560  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
561  self.protein, self.model, self.namesToParticles)
562  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
563 
564  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
565  self.model.add_restraint(evr)
566 
567  # Add lennard jones restraints between flexible atoms if they haven't been
568  # added already, and then scale radii of flexible atoms by input scaling
569  # factor
570  def addFlexFlexLjRestraints(self, scalingFactor):
571  if (self.initializedFlexFlexLjRestraints == 0):
572  print("adding initial lennard jones restraints between pairs "
573  "of flexible atoms")
574  self.initializedFlexFlexLjRestraints = 1
575  leaves = IMP.atom.get_leaves(self.protein)
576 
577  sf = IMP.atom.ForceSwitch(6.0, 7.0)
579  for i in range(len(leaves)):
580  iLeaf = leaves[i]
581  for j in range(i + 1, len(leaves)):
582  jLeaf = leaves[j]
583  if ((self.quickParticleName(iLeaf) in self.flexibleAtoms)
584  == 1 and
585  (self.quickParticleName(jLeaf) in self.flexibleAtoms)
586  == 1):
587  ljpr = IMP.core.PairRestraint(
588  ps, [iLeaf, jLeaf], "LennardJones %s_%s" %
589  (i, j))
590  self.model.add_restraint(ljpr)
591  print("scaling atomic radii by scaling factor %s" % scalingFactor)
592  # reset radii back to original in preparation for rescaling
593  self.ff.add_radii(self.protein)
594  for pName in self.flexibleAtoms.keys():
595  particle = self.namesToParticles[pName]
596  xyzr = IMP.core.XYZR(particle)
597  radius = xyzr.get_radius()
598  radius *= scalingFactor
599  xyzr.set_radius(radius)
600 
601  # add lennard-jones restraints between all fixed atoms and all flexible
602  # atoms
603  def addFixedFlexibleLjRestraints(self):
604  print("adding initial fixed flexible lj restraints")
605  fixedAtoms = self.getFixedAtoms()
606  sf = IMP.atom.ForceSwitch(6.0, 7.0)
608 
609  for flexPname in self.flexibleAtoms.keys():
610  flexParticle = self.namesToParticles[flexPname]
611  for fixedPname in fixedAtoms.keys():
612  fixedParticle = self.namesToParticles[fixedPname]
613  ljpr = IMP.core.PairRestraint(
614  ps, [fixedParticle, flexParticle], "LennardJones %s_%s" %
615  (flexPname, fixedPname))
616  self.model.add_restraint(ljpr)
617 
618  # actually do the work to create the DOPE restraint
619  def createDopeRestraint(self, pair):
620  q0 = self.quickParticleName(pair[0])
621  q1 = self.quickParticleName(pair[1])
622  # create restraint around this distance, add to model
623  prName = "NonBonded %s_%s" % (q0, q1)
624  pairRestraint = IMP.core.PairRestraint(
625  self.dopePairScore, pair, prName)
626 
627  # print "Added non-bonded restraint between %s and %s" % (q0, q1)
628  self.model.add_restraint(pairRestraint)
629  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
630  pairRestraint.set_maximum_score(maxNonBondedScore)
631  # initialScore = pairRestraint.evaluate(0)
632 
633  # add non-bonded restraints across close pairs. At least one atom in each
634  # pair should be flexible
635  def addClosePairNonBondedRestraints(self):
636  particlePairs = self.getNonBondedPairs()
637 
638  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
639  dopeCutoff = float(self.getParam("dope_cutoff"))
640 
641  # k = IMP.core.Harmonic.get_k_from_standard_deviation(1)
642 
643  ps = 0
644  if (nonBondedRestraintType == "dope"):
645  IMP.atom.add_dope_score_data(self.protein)
646  ps = IMP.atom.DopePairScore(dopeCutoff)
647  self.dopePairScore = ps
648  nonBondedCount = 0
649  self.nonBondedPairs = self.initializeParticleNameDict(
650  IMP.atom.get_leaves(self.protein))
651  for pair in particlePairs:
652 
653  if (self.skipNonBondedPair(pair) == 1):
654  continue
655 
656  self.createDopeRestraint(pair)
657 
658  nonBondedCount += 1
659  self.nonBondedPairs[self.quickParticleName(pair[0])][
660  self.quickParticleName(pair[0])] = 1
661  self.nonBondedPairs[self.quickParticleName(pair[1])][
662  self.quickParticleName(pair[1])] = 1
663 
664  print("Added %s non-bonded restraints" % nonBondedCount)
665  self.model.evaluate(False)
666 
667  def addCompleteNonBondedRestraints(self):
668  leaves = IMP.atom.get_leaves(self.protein)
669  counter = 0
670 
671  for i in range(len(leaves)):
672  iLeaf = leaves[i]
673  for j in range(i + 1, len(leaves)):
674  jLeaf = leaves[j]
675 
676  if ((self.quickParticleName(jLeaf)
677  in self.nonBondedPairs[self.quickParticleName(iLeaf)])
678  == 1):
679  continue
680  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
681  continue
682 
683  self.createDopeRestraint([iLeaf, jLeaf])
684  counter += 1
685  # print "added complete restraint between %s and %s" %
686  # (self.quickParticleName(iLeaf),
687  # self.quickParticleName(jLeaf))
688  print("added %s restraints for %s particles" % (counter, len(leaves)))
689  self.model.evaluate(False)
690 
691  # lots of output on all restraints
692  def writeAllRestraints(self):
693  print("write all restraints:")
694  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
695  print(self.quickRestraintName(r))
696 
697  # Read parameterss with md temperature / step schedule and save them
698  def initializeMdSchedule(self):
699  schedule = self.getParam("md_schedule")
700  stages = schedule.split()
701  totalSteps = 0
702  for stage in stages:
703  print("unpacking stage %s" % stage)
704  [steps, temperature, excludedVolume] = stage.split('_')
705  totalSteps += int(steps)
706  self.totalMdSteps = totalSteps
707  self.mdStages = stages
708 
709  # Creates roothandle for hdf5 objects interacting with md trajectory
710  # Either we create a new trajectory in this file and read it back in the
711  # same run, or we are reading a saved trajectory
712  def createMdRootHandle(self):
713 
714  mdFileName = ""
715  outputDir = self.getParam("output_directory")
716 
717  if (self.getParam("read_assignments_from_file") == "no"):
718  # create new hdf5 file to which md output will be written
719  mdFileName = self.getParam("md_trajectory_output_file")
720  fileName = os.path.join(outputDir, mdFileName)
721  print("creating rmf file with filename %s" % fileName)
722  rh = RMF.create_rmf_file(fileName)
723  # my_kc = rh.add_category("my data")
724  IMP.rmf.add_hierarchy(rh, self.protein)
725 
726  self.rootHandle = rh
727  else:
728  # use existing hdf5 file
729  mdFileName = self.getParam("saved_md_filename")
730 
731  rh = RMF.open_rmf_file(mdFileName)
732  IMP.rmf.set_hierarchies(rh, [self.protein])
733 
734  self.rootHandle = rh
735 
736  def evaluateModel(self):
737  return self.model.evaluate(False)
738 
739  def loadMdHelpers(self):
740 
741  self.createMdRootHandle()
742 
743  self.createMdOptimizer()
744 
745  self.addMdOptimizerStates()
746 
747  self.initializeMdSchedule()
748 
749  self.createdMdOptimizer = 1
750 
751  # Create the optimizer object for running MD
752  def createMdOptimizer(self):
753  print("creating md object")
754  md = IMP.atom.MolecularDynamics(self.model)
755  # hack to get around bug where vx attributes in non-optimized particles
756  # are not being set
757  md.optimize(1)
758  md.set_velocity_cap(100.0)
759  self.mdOptimizer = md
760 
761  def addMdOptimizerStates(self):
762 
763  # VelocityScalingOptimizerState
765  IMP.atom.get_leaves(self.protein),
766  0) # temperature is set later
767  # vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
768 
769  # SaveHierarchyConfigurationOptimizerState
770  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
771  [self.protein], self.rootHandle)
772  self.mdOptimizer.add_optimizer_state(hdos)
773 
774  self.hdos = hdos
775  self.vsos = vsos
776 
777  def readTrajectories(self):
778 
779  cgFileName = self.getParam("cg_output_file")
780  bestCgScore = 10000000
781  bestCgScoreFile = ""
782  bestCgRmsd = 10000000
783  bestCgRmsdFile = ""
784 
785  outputDir = self.getParam("output_directory")
786  trajectoryFile = self.getParam("md_trajectory_output_file")
787  fullFile = os.path.join(outputDir, trajectoryFile)
788  rh = RMF.open_rmf_file(fullFile)
789  IMP.rmf.set_hierarchies(rh, [self.protein])
790  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
791  rh, int(self.getParam("cg_interval")), self.protein)
792 
793  if (len(framesToRead) > 0):
794  for cgNumber in framesToRead:
795  # Open next cg trajectory
796  outputDir = self.getParam("output_directory")
797  fullCgFileName = os.path.join(
798  outputDir, "%s%s" %
799  (cgFileName, cgNumber))
800  rh = RMF.open_rmf_file(fullCgFileName)
801  IMP.rmf.set_hierarchies(rh, [self.protein])
802 
803  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
804  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
805  score = self.model.evaluate(False)
806  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
807  print("cg number %s has score %s rmsd %s"
808  % (cgNumber, score, rmsd))
809  if (score < bestCgScore):
810  bestCgScore = score
811  bestCgScoreFile = fullCgFileName
812  if (rmsd < bestCgRmsd):
813  bestCgRmsd = rmsd
814  bestCgRmsdFile = fullCgFileName
815 
816  # output best score information
817  self.singlePdbResults(
818  bestCgScoreFile,
819  -1,
820  self.getParam("best_cg_score_output_file"))
821  self.singlePdbResults(
822  bestCgRmsdFile,
823  -1,
824  self.getParam("best_cg_rmsd_output_file"))
825  self.singlePdbResults(
826  "%s%s" % (cgFileName, framesToRead[-1]),
827  -1, self.getParam("final_cg_frame_output_file"))
828  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
829  self.bestCgScore = bestCgScore
830  self.bestCgRmsd = bestCgRmsd
831  self.finalCgRmsd = finalCgRmsd
832 
833  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
834 
835  fullTrajectoryFile = os.path.join(
836  self.getParam("output_directory"),
837  trajectoryFile)
838  fullOutputFile = os.path.join(
839  self.getParam("output_directory"),
840  outputPdbFile)
841  rh = RMF.open_rmf_file(fullTrajectoryFile)
842  IMP.rmf.set_hierarchies(rh, [self.protein])
843  if (frame == -1):
844  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
845  IMP.rmf.load_frame(rh, frame, self.protein)
846  IMP.atom.write_pdb(self.protein, fullOutputFile)
847 
848  def calculateRmsd(self, otherProtein, flexibleAtoms):
849  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
850  otherProtein)
851  otherVector = []
852  modelVector = []
853  for pName in otherNamesToParticles.keys():
854  if ((pName in flexibleAtoms) == 0):
855  continue
856  otherParticle = otherNamesToParticles[pName]
857  modelParticle = self.namesToParticles[pName]
858  otherVector.append(
859  IMP.core.XYZ(otherParticle).get_coordinates())
860  modelVector.append(
861  IMP.core.XYZ(modelParticle).get_coordinates())
862  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
863  return rmsd
864 
865  def calculateNativeRmsd(self, flexibleAtoms):
866 
867  if (self.wroteNativeProtein == 0):
868  pdbName = self.getParam("native_pdb_input_file")
869  self.nativeModel = IMP.Model()
870  self.nativeProtein = IMP.atom.read_pdb(
871  pdbName,
872  self.nativeModel,
874  self.wroteNativeProtein = 1
875 
876  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
877 
878  # Run MD according to the temperature schedule read in from parameters
879  def runMolecularDynamics(self):
880  self.initializedFlexFlexLjRestraints = 0
881  if (self.fixedNonFlexibleAtoms == 0):
882  print("error: before running md, must fix non flexible atoms")
883  sys.exit()
884  stepsSoFar = 0
885  for stage in self.mdStages:
886  [steps, temperature, excludedVolume] = stage.split('_')
887  stepsSoFar += int(steps)
888  self.vsos.set_temperature(int(temperature))
889  # add lennard jones restraints between flexible atoms and
890  # fixed atoms (don't add between flex atoms)
891  if excludedVolume == "1":
892  self.addFixedFlexibleLjRestraints()
893  elif (excludedVolume != "0"):
894  # add lennard jones restraints between flexible atoms
895  if (self.getParam("excluded_volume_type") == "lj"):
896  self.addFlexFlexLjRestraints(float(excludedVolume))
897  elif (self.getParam("excluded_volume_type") == "ev"):
898  # add full excluded volume restraints (just take
899  # everything in the protein)
900  self.addExcludedVolumeRestraints()
901  else:
902  "please set excluded_volume_type to either lj or ev"
903  sys.exit()
904  print("running md at temperature %s for %s steps"
905  % (temperature, steps))
906  self.mdOptimizer.optimize(int(steps))
907 
909  self.protein,
910  os.path.join(
911  self.getParam(
912  "output_directory"),
913  "md_after_%s.pdb" %
914  stepsSoFar))
915  print("model score after stage %s is %s"
916  % (stage, self.model.evaluate(False)))
917  print("done running md")
918 
919  def runAllCg(self):
920  cgInterval = int(self.getParam("cg_interval"))
921  outputDir = self.getParam("output_directory")
922  trajectoryFile = self.getParam("md_trajectory_output_file")
923  fullFile = os.path.join(outputDir, trajectoryFile)
924  print("open rmf %s" % fullFile)
925  rh = RMF.open_rmf_file(fullFile)
926  IMP.rmf.set_hierarchies(rh, [self.protein])
927  framesToRead = atomicDominoUtilities.getMdIntervalFrames(
928  rh, cgInterval, self.protein)
929 
930  for i in framesToRead:
931  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
932  self.applyCg(i, cgFileName)
933 
934  # Apply CG to each step of the saved MD trajectory. Writes results to same
935  # file storing the MD trajectory
936  def applyCg(self, mdStepStart, cgFileName):
937 
938  cgSteps = int(self.getParam("cg_steps"))
939 
940  # load md step specified by calling object
941  print("apply cg: loading md frame for cg step start %s" % mdStepStart)
942  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
943 
944  # create new hdf5 file to which cg output will be written
945  outputDir = self.getParam("output_directory")
946  fileName = os.path.join(outputDir, cgFileName)
947  print("creating cg hdf5 file %s" % fileName)
948  rh = RMF.create_rmf_file(fileName)
949  # my_kc = rh.add_category("my data")
950  IMP.rmf.add_hierarchy(rh, self.protein)
951 
952  # apply cg
953  cg = IMP.core.ConjugateGradients(self.model)
954  firstScore = self.model.evaluate(False)
955 
956  print("creating optimizer state")
957  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState(
958  [self.protein], rh)
959  hdos.set_skip_steps(0)
960  # hdos is the optimizer state writing configurations to disk
961  cg.add_optimizer_state(hdos)
962 
963  print("running cg")
964  cg.optimize(cgSteps)
965  secondScore = self.model.evaluate(False)
966  print("cg score after md step %s before %s after %s"
967  % (mdStepStart, firstScore, secondScore))
968  return secondScore
969 
970  # Tell the optimizer not to move atoms we have determined ar fixed
971  def fixNonFlexibleAtoms(self):
972  if (self.createdMdOptimizer == 0):
973  self.createMdOptimizer()
974  # print "ERROR: must create md optimizer and optimize once before
975  # set_particles_are_optimized is called"
976  # sys.exit()
977 
978  self.fixedNonFlexibleAtoms = 1
979  leaves = IMP.atom.get_leaves(self.protein)
980  for leaf in leaves:
981  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
982  xyzDecorator = IMP.core.XYZ(leaf)
983  xyzDecorator.set_coordinates_are_optimized(0)
984 
985  def writeOutput(self):
986  leaves = IMP.atom.get_leaves(self.protein)
987  flexiblePeptideAtoms = 0
988  flexibleProteinAtoms = 0
989  fixedPeptideAtoms = 0
990  fixedProteinAtoms = 0
991 
992  for leaf in leaves:
993  if ((self.quickParticleName(leaf) in self.flexibleAtoms) == 0):
994  if (self.getChain(leaf) == self.getParam("peptide_chain")):
995  fixedPeptideAtoms += 1
996  else:
997  fixedProteinAtoms += 1
998  else:
999  if (self.getChain(leaf) == self.getParam("peptide_chain")):
1000  flexiblePeptideAtoms += 1
1001  else:
1002  flexibleProteinAtoms += 1
1003  print("Flexible peptide atoms: %s" % flexiblePeptideAtoms)
1004  print("Fixed peptide atoms: %s" % fixedPeptideAtoms)
1005  print("Flexible protein atoms: %s" % flexibleProteinAtoms)
1006  print("Fixed protein atoms: %s" % fixedProteinAtoms)
1007 
1008  def writeOsOutput(self):
1009 
1010  print("Best cg score: %s" % self.bestCgScore)
1011  print("best cg rmsd: %s" % self.bestCgRmsd)
1012  print("final cg rmsd: %s" % self.finalCgRmsd)
def __init__
Create Jeffreys prior.
Definition: parameters.py:43
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
Get the containing chain or Chain() if there is none.
Score the angle based on a UnaryFunction,.
Simple conjugate gradients optimizer.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:128
double get_rmsd(const Selection &s0, const Selection &s1)
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
Lennard-Jones score between a pair of particles.
Simple molecular dynamics simulator.
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
RestraintsTemp get_restraints(It b, It e)
Definition: RestraintSet.h:132
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Score the bond based on a UnaryFunction,.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
Definition: VectorD.h:196
Applies a PairScore to a Pair.
Definition: PairRestraint.h:31
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Score pair of atoms based on DOPE.
Definition: DopePairScore.h:36
Smooth interaction scores by switching the derivatives (force switch).
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:27
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27