IMP  2.1.0
The Integrative Modeling Platform
peptideDocker.py
1 import IMP
2 import subprocess
3 import random
4 import IMP.domino
5 import IMP.core
6 import IMP.rmf
7 import RMF
8 
9 import time
10 import IMP.algebra
11 import types
12 import re
13 import sys
14 import operator
15 import os
16 import atomicDominoUtilities
17 
18 class PeptideDocker:
19 
20  def __init__(self, parameterFileName):
21  self.createdMdOptimizer = 0
22  self.fixedNonFlexibleAtoms = 0
23  self.readParameters(parameterFileName)
24  self.wroteNativeProtein = 0
25 
26  def loadHelpers(self):
27  self.loadDockingHelpers()
28  self.loadMdHelpers()
29 
30  def readParameters(self, parameterFileName):
31  self.parameters = atomicDominoUtilities.readParameters(parameterFileName)
32 
33  #Processing specific to peptide docking and this script
34  def loadDockingHelpers(self):
35  outputDir = self.getParam("output_directory")
36  mkdirProcess = subprocess.Popen(['mkdir', '-p', self.getParam('output_directory')], shell=False, stderr=subprocess.PIPE)
37  output = mkdirProcess.communicate()
38 
39  self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(self.protein)
40 
41  self.times = []
42  self.rawTimes = []
43 
44  #Helper method; create dictionary where the keys are names of all particles in the system and the values are themselves dictionaries
45  def initializeParticleNameDict(self, leaves):
46  particleDict = {}
47 
48  for leaf in leaves:
49  name = self.quickParticleName(leaf)
50  particleDict[name] = {}
51  return particleDict
52 
53  def getModel(self):
54  return self.model
55 
56  def getProtein(self):
57  return self.protein
58 
59  def createModel(self):
60 
61  pdbName = self.getParam("native_pdb_input_file")
62 
63  self.model = IMP.kernel.Model()
64 
65  self.protein = IMP.atom.read_pdb(pdbName, self.model, IMP.atom.ATOMPDBSelector())
66 
67  #atom radii, forcefield topology, etc. -- no restraints created here
68 
70  IMP.atom.get_data_path("par.lib"))
71 
72  topology = ff.create_topology(self.protein)
73  topology.apply_default_patches()
74 
75  topology.add_atom_types(self.protein)
76 
77  ff.add_radii(self.protein)
78  ff.add_well_depths(self.protein)
79  self.ff = ff
80  self.topology = topology
81 
82  #Get parameter value. Throws dictionary exception if not found
83  def getParam(self, paramName):
84  paramValue = self.parameters[paramName]
85  return paramValue
86 
87  #Peptide-docking friendly way to log which restraint we're dealing with
88  def quickRestraintName(self, r):
89  nameList = []
90  ps = r.get_input_particles()
91 
92  for p in ps:
93  if (self.isAtomParticle(p) == 0):
94  continue
95  isFlexible = 0
96  if (self.flexibleAtoms.has_key(self.quickParticleName(p)) == 1):
97  isFlexible = 1
98  nameList.append("%s_%s" % (self.quickParticleName(p), isFlexible))
99 
100  return "%s acting on %s" % (r.get_name(), " , ".join(nameList))
101 
102  def quickParticleName(self, particle):
103  return atomicDominoUtilities.quickParticleName(particle)
104 
105  #output current time
106  def logTime(self, label):
107  nextTime = time.asctime(time.localtime(time.time()))
108  print "%s:\t %s" % (label, nextTime)
109  nextTimeEntry = [label, nextTime]
110  self.times.append(nextTimeEntry)
111  self.rawTimes.append(time.time())
112 
113  #output all times that were saved by logTime()
114  def outputTimes(self):
115  print "Time for each step:"
116  for timeEntry in self.times:
117  label = timeEntry[0]
118  value = timeEntry[1]
119  print "%s:\t%s" % (label, value)
120 
121 
122  def getFlexibleAtoms(self):
123  return self.flexibleAtoms
124 
125  #Helper method to quickly get the chain identifier for a particle
126  def getChain(self, particle):
127  atomDecorator = IMP.atom.Atom.decorate_particle(particle)
128  residue = IMP.atom.get_residue(atomDecorator)
129  chain = IMP.atom.get_chain(residue)
130  chainId = chain.get_id()
131  return chainId
132 
133  #Helper method to quickly get the residue index for a particle
134  def getResidue(self, particle):
135  atomDecorator = IMP.atom.Atom.decorate_particle(particle)
136  residue = IMP.atom.get_residue(atomDecorator)
137  residueNumber = residue.get_index()
138  return residueNumber
139 
140  def isAtomParticle(self, p):
141  return atomicDominoUtilities.isAtomParticle(p)
142 
143  #Read all particles in restraint; return 1 if all are flexible and 0 if at least one atom is fixed
144  def allFlexibleAtoms(self, r):
145  ps = r.get_input_particles()
146  for p in ps:
147  if (self.isAtomParticle(p) == 0):
148  continue
149  if (self.flexibleAtoms.has_key(self.quickParticleName(p)) == 0):
150  return 0
151  return 1
152 
153  #Check if we should skip processing of this pair of particles. Don't add restraints between particles that aren't optimized,
154  #and don't add restraints between particles on the same residue (for now assume stereochemistry is good enough for this)
155  def skipNonBondedPair(self, pair):
156 
157  #skip if same residue
158  if (self.getResidue(pair[0]) == self.getResidue(pair[1])):
159  return 1
160 
161  #skip if neither is flexible
162  if (self.flexibleAtoms.has_key(self.quickParticleName(pair[0])) == 0 and self.flexibleAtoms.has_key(self.quickParticleName(pair[1])) == 0):
163  return 1
164  return 0
165 
166  #Set up degrees of freedom for the system. Either we are dealing with a fixed protein or a flexible one.
167  #Determine which atoms are flexible and save them in self.flexibleAtoms
168  def initDof(self):
169 
170  peptideChain = self.getParam("peptide_chain")
171  self.flexibleAtoms = {}
172 
173  if (self.getParam("flexible_protein") == "yes"):
174  #All atoms within range of the peptide are flexible; range defined by close_pair_distance param
175  particlePairs = self.getClosePairs()
176 
177  for pair in particlePairs:
178  p0 = pair[0]
179  p1 = pair[1]
180  p0Chain = self.getChain(p0)
181  p1Chain = self.getChain(p1)
182  if (p0Chain == peptideChain or p1Chain == peptideChain):
183  self.flexibleAtoms[self.quickParticleName(p0)] = 1
184  self.flexibleAtoms[self.quickParticleName(p1)] = 1
185 
186  else:
187  #only peptide atoms are flexible, determined by the chain id
188  leaves = IMP.atom.get_leaves(self.protein)
189  for leaf in leaves:
190  if (self.getChain(leaf) == peptideChain):
191  self.flexibleAtoms[self.quickParticleName(leaf)] = 1
192  self.fixNonFlexibleAtoms()
193 
194  #return a dictionary where the keys are the names of the fixed atoms in the system
195  def getFixedAtoms(self):
196  leaves = IMP.atom.get_leaves(self.protein)
197  fixedAtoms = {}
198  for leaf in leaves:
199  leafName = self.quickParticleName(leaf)
200  if self.flexibleAtoms.has_key(leafName) == 0:
201  fixedAtoms[leafName] = 1
202  return fixedAtoms
203 
204 
205  #for each flexible atom, give it the coordinates of another flexible atom (ensures that each atom starts out in the binding site)
206  #Although it does guarantee a very random initial configuration, it is cheating since we are using knowledge of the native state
207  def initializeRandomExisting(self):
208  leaves = IMP.atom.get_leaves(self.protein)
209  particlesToVary = []
210  xyzs = []
211  for leaf in leaves:
212  if (self.flexibleAtoms.has_key(self.quickParticleName(leaf)) == 1):
213  particlesToVary.append(self.quickParticleName(leaf))
214  atomToCoordinates = {}
215  for pName in particlesToVary:
216  randomPname = random.choice(particlesToVary)
217  randomP = self.namesToParticles[randomPname]
218  print "setting coordinates for particle %s to the ones currently set for particle %s" % (pName, randomPname)
219  atomToCoordinates[pName] = IMP.core.XYZ.decorate_particle(randomP).get_coordinates()
220 
221  for pName in particlesToVary:
222  particle = self.namesToParticles[pName]
223  xyz = IMP.core.XYZ.decorate_particle(particle)
224  xyz.set_coordinates(atomToCoordinates[pName])
225  xyzs.append(xyz)
226  return xyzs
227 
228 
229  #Set initial positions of flexible atoms in the system. We are experimenting with the best way to do this
230  def setInitialPositions(self):
231  initialPositionMethod = self.getParam("initial_position_method")
232 
233  #set random coordinates for atom by creating bounding box around native coordinates, increasing it
234  #by defined offset plus random number (necessary when running on cluster)
235  if (initialPositionMethod == "random"):
236 
237  myRandom = random.random()
238  initialOffset = float(self.getParam("initial_atom_offset")) + myRandom
239  for particleName in self.flexibleAtoms.keys():
240  p = self.namesToParticles[particleName]
241  xyzDecorator = IMP.core.XYZ.decorate_particle(p)
242  coordinates = xyzDecorator.get_coordinates()
243  bb = IMP.algebra.BoundingBox3D(coordinates)
244  bb += initialOffset
245  bb += myRandom
246 
247  randomXyz = IMP.algebra.get_random_vector_in(bb)
248  xyzDecorator.set_coordinates(randomXyz)
249 
250  #for each flexible atom, give it the coordinates of another flexible atom (ensures that each atom starts out in the binding site)
251  elif (initialPositionMethod == "random_existing"):
252  print "start random existing"
253  xyzs = self.initializeRandomExisting()
254 
255  #same as random_existing except offset each atom a little (doesn't seem to make too much of a difference)
256  elif (initialPositionMethod == "random_existing_box"):
257  xyzs = self.initializeRandomExisting()
258  initialOffset = float(self.getParam("initial_atom_offset"))
259  for xyzDecorator in xyzs:
260  coordinates = xyzDecorator.get_coordinates()
261  bb = IMP.algebra.BoundingBox3D(coordinates)
262  bb += initialOffset
263 
264  randomXyz = IMP.algebra.get_random_vector_in(bb)
265  xyzDecorator.set_coordinates(randomXyz)
266 
267  elif (initialPositionMethod == "random_box"):
268 
269  #Get bounding box containing all particles within close pair distance of flexible atoms
270  #This doesn't work perfectly; can put initial positions in the fixed atoms accidentally
271  #As constructed it is specific to peptide atoms only and not flexible protein atoms
272  bsParticles = []
273 
274  for firstPname in self.nonBondedPairs.keys():
275  secondPnames = self.nonBondedPairs[firstPname]
276  for secondPname in secondPnames.keys():
277  if (self.flexibleAtoms.has_key(secondPname) == 0):
278  fixedAtom = self.namesToParticles[firstPname]
279 
280  bsParticles.append(IMP.core.XYZ.decorate_particle(fixedAtom))
281  print "added next close atom %s to bounding box particles" % firstPname
282  #bsBoundingBox = IMP.core.get_bounding_box(bsParticles) -- revert if I can figure out how
283  peptideParticles = []
284  leaves = IMP.atom.get_leaves(self.protein)
285  for leaf in leaves:
286  if (self.getChain(leaf) == self.getParam("peptide_chain")):
287  peptideParticles.append(IMP.core.XYZ.decorate_particle(leaf))
288  bsBoundingBox = IMP.core.get_bounding_box(peptideParticles)
289  #set initial position for each flexible atoms, and adjust it if it is too close to a fixed residue
290  fixedAtoms = self.getFixedAtoms()
291  for flexPname in self.flexibleAtoms.keys():
292  goodPosition = 0
293  flexParticle = self.namesToParticles[flexPname]
294  flexXyzDecorator = IMP.core.XYZ.decorate_particle(flexParticle)
295  print "processing position for pname %s" % flexPname
296  while(goodPosition == 0):
297  goodPosition = 1
298  flexXyzDecorator.set_coordinates(IMP.algebra.get_random_vector_in(bsBoundingBox))
299  for fixedPname in fixedAtoms.keys():
300  fixedParticle = self.namesToParticles[fixedPname]
301  distance = IMP.algebra.get_distance(IMP.core.XYZ.decorate_particle(fixedParticle).get_coordinates(),
302  IMP.core.XYZ.decorate_particle(flexParticle).get_coordinates())
303  if (distance < 2):
304  print "finding new position for %s" % flexPname
305  goodPosition = 0
306  break #start the while loop over which will set new coordinates
307 
308 
309  #take a box around the whole protein and randomly initialize atoms here
310  elif (initialPositionMethod == "random_full"):
311  bb = IMP.atom.get_bounding_box(self.protein)
312  for particleName in self.flexibleAtoms.keys():
313  print "creating random position for particle %s" % particleName
314  p = self.namesToParticles[particleName]
315  xyzDecorator = IMP.core.XYZ.decorate_particle(p)
316  randomXyz = IMP.algebra.get_random_vector_in(bb)
317  myRandom = random.random()
318  bb += myRandom
319  xyzDecorator.set_coordinates(randomXyz)
320 
321 
322  #Read in a file generated from a previous run (fastest way is to create a new model and copy corresponding XYZ Decorators)
323  elif (initialPositionMethod == "file"):
324 
325  initialPositionFile = self.getParam("saved_initial_atom_positions")
326  print "reading initial positions from %s" % initialPositionFile
327  initialModel = IMP.kernel.Model()
328  initialProtein = IMP.atom.read_pdb(initialPositionFile, initialModel, IMP.atom.ATOMPDBSelector())
329  initialLeaves = IMP.atom.get_leaves(initialProtein)
330  for initialLeaf in initialLeaves:
331  name = self.quickParticleName(initialLeaf)
332  if (self.namesToParticles.has_key(name) == 1):
333  existingLeaf = self.namesToParticles[name]
334  existingLeafXyz = IMP.core.XYZ.decorate_particle(existingLeaf)
335  initialLeafXyz = IMP.core.XYZ.decorate_particle(initialLeaf)
336  existingLeafXyz.set_coordinates(initialLeafXyz.get_coordinates())
337  else:
338  print "Read in initial positions from file %s but this file contained particle %s which is not in current model" % (initialPositionFile, name)
339  else:
340  print "Please specify valid initial position method (random or file)\n"
341  sys.exit()
342 
343  #output initial positions
344  outputDir = self.getParam("output_directory")
345  initialAtomPositionFile = self.getParam("initial_atom_positions_output_file")
346  fullOutputFile = os.path.join(outputDir, initialAtomPositionFile)
347  print "writing output to file %s" % fullOutputFile
348  IMP.atom.write_pdb(self.protein, fullOutputFile)
349 
350  #get initial score for model
351  initialScore = self.model.evaluate(False)
352  print "initial score for model is %s" % initialScore
353 
354 
355  #Get non-bonded pairs in the system in preparation for restraining them
356  def getNonBondedPairs(self):
357  nonBondedDefinition = self.getParam("non_bonded_definition")
358  particlePairs = []
359  if (nonBondedDefinition == "close_pairs"):
360  particlePairs = self.getClosePairs()
361  elif (nonBondedDefinition == "random"):
362  particlePairs = self.getRandomParticlePairs()
363  elif (nonBondedDefinition == "manual"):
364  particlePairs = self.readManualRestraints(nativeProtein, 6)
365  else:
366  print "Please specify valid non bonded pair definition"
367  sys.exit()
368  return particlePairs
369 
370  #Get 3D distance between the particles in the pair
371  def getXyzDistance(self, pair):
372  d0 = IMP.core.XYZ.decorate_particle(pair[0])
373  d1 = IMP.core.XYZ.decorate_particle(pair[1])
374  distance = IMP.core.get_distance(d0, d1)
375  return distance
376 
377  #Get all pairs of particles within the distance defined by close_pair_distance parameter
378  def getClosePairs(self):
379 
380  # Get a list of all atoms in the protein, and put it in a container
381  leaves = IMP.atom.get_leaves(self.protein)
383  closePairDistance = float(self.getParam("close_pair_distance"))
384  useHardCutoff = self.getParam("use_hard_distance_cutoff") #sometimes closePairContainer returns particles > distanceCutoff, even with slack = 0
385 
386  nbl = IMP.container.ClosePairContainer(cont, closePairDistance, 0.0)
387 
388  self.model.evaluate(False)
389 
390  particlePairs = nbl.get_particle_pairs()
391  finalPairs = []
392  for pair in particlePairs:
393  distance = self.getXyzDistance(pair)
394  q0 = self.quickParticleName(pair[0])
395  q1 = self.quickParticleName(pair[1])
396 
397  if (useHardCutoff == "0" or (useHardCutoff == "1" and distance < closePairDistance)):
398  finalPairs.append([pair[0], pair[1]])
399 
400  return finalPairs
401 
402  #Add restraints for bond lengths, angles, dihedrals, and impropers
403  def addForceFieldRestraints(self):
404 
405  useForcefieldRestraints = self.getParam("use_forcefield_restraints")
406  if (useForcefieldRestraints == "1"):
407 
409  IMP.atom.get_data_path("par.lib"))
410 
411  # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,
412  # impropers) of the CHARMM forcefield
413  bonds = self.topology.add_bonds(self.protein)
414  angles = ff.create_angles(bonds)
415  dihedrals = ff.create_dihedrals(bonds)
416  impropers = self.topology.add_impropers(self.protein)
417 
418  #tracks which particles are together in forcefield restraints (useful for filtering excluded volume)
419  self.ffParticleGroups = self.initializeParticleNameDict(IMP.atom.get_leaves(self.protein))
420 
421  self.createForceFieldRestraintsForModel(bonds, "bonds", "Bond Restraints", IMP.atom.BondSingletonScore(IMP.core.Harmonic(0,1)))
422  self.createForceFieldRestraintsForModel(angles, "angles", "Angle Restraints", IMP.atom.AngleSingletonScore(IMP.core.Harmonic(0,1)))
423  self.createForceFieldRestraintsForModel(dihedrals, "dihedrals", "Dihedral Restraints", IMP.atom.DihedralSingletonScore())
424  self.createForceFieldRestraintsForModel(impropers, "impropers", "Improper Restraints", IMP.atom.ImproperSingletonScore(IMP.core.Harmonic(0,1)))
425 
426  else:
427  print ("Skipping forcefield restraints")
428 
429  #Create one type of forcefield restraint. The restraint is initially across the full model and needs to be decomposed
430  #so it can be used in the restraint graph.
431  def createForceFieldRestraintsForModel(self, ffParticles, containerName, restraintName, singletonScore):
432 
433  maxForceFieldScore = float(self.getParam("max_forcefield_score"))
434  cont = IMP.container.ListSingletonContainer(self.model, containerName)
435  cont.add_particles(ffParticles)
436  listBondRestraint = IMP.container.SingletonsRestraint(singletonScore, cont, restraintName)
437  self.model.add_restraint(listBondRestraint)
438 
439  #Restraint decomposition has been a little touchy; a couple workarounds contained
440  decomposedRestraintTemp = listBondRestraint.create_decomposition()
441  decomposedRestraints = IMP.kernel.RestraintSet.get_from(decomposedRestraintTemp)
442  rs_restraints= decomposedRestraints.get_restraints()
443 
444  #manually remove the full restraint after getting the decomposition
445  self.model.remove_restraint(listBondRestraint)
446 
447  count = decomposedRestraints.get_number_of_restraints()
448  for i in range(count):
449 
450  r = decomposedRestraints.get_restraint(i)
451 
452  if (self.allFlexibleAtoms(r) == 1):
453  #add the decomposed restraint to the model
454  self.model.add_restraint(r)
455  self.model.set_maximum_score(r, maxForceFieldScore)
456  self.addParticlesToGroups(r)
457  else:
458  t = 1
459  #self.model.remove_restraint(r)
460 
461  #Add all atoms in r to ffParticleGroups; tracks which atoms are restrained by forcefield terms so
462  #we don't restrain them in other methods (e.g. excluded volume) -- essentially a manual pair-filter
463  def addParticlesToGroups(self, r):
464  inputParticles = []
465  ps = r.get_input_particles()
466 
467  for p in ps:
468 
469  if (self.isAtomParticle(p) == 0): #skip particles that aren't actually atoms (e.g. bond particles)
470  continue
471  inputParticles.append(p.get_name())
472 
473  for firstName in inputParticles:
474  for secondName in inputParticles:
475  if (firstName == secondName):
476  continue
477  self.ffParticleGroups[firstName][secondName] = 1
478 
479  #Add excluded volume restraints across close pairs. Still a work in progress as they have been slowing down
480  #the code quite a bit and we'll probably want to turn them on only at a defined time
481  def addExcludedVolumeRestraints(self):
482 
483  restrainedParticles = atomicDominoUtilities.getRestrainedParticles(self.protein, self.model, self.namesToParticles)
484  lsc = IMP.container.ListSingletonContainer(restrainedParticles)
485 
486  evr = IMP.core.ExcludedVolumeRestraint(lsc, 1, 1)
487  self.model.add_restraint(evr)
488 
489 
490  #Add lennard jones restraints between flexible atoms if they haven't been added already, and then scale radii of flexible atoms by input scaling factor
491  def addFlexFlexLjRestraints(self, scalingFactor):
492  if (self.initializedFlexFlexLjRestraints == 0):
493  print "adding initial lennard jones restraints between pairs of flexible atoms"
494  self.initializedFlexFlexLjRestraints = 1
495  leaves = IMP.atom.get_leaves(self.protein)
496  counter = 0
497 
498  sf = IMP.atom.ForceSwitch(6.0, 7.0)
500  for i in range(len(leaves)):
501  iLeaf = leaves[i]
502  for j in range(i + 1, len(leaves)):
503  jLeaf = leaves[j]
504  if (self.flexibleAtoms.has_key(self.quickParticleName(iLeaf)) == 1 and self.flexibleAtoms.has_key(self.quickParticleName(jLeaf)) == 1):
505  ljpr = IMP.core.PairRestraint(ps, [iLeaf, jLeaf], "LennardJones %s_%s" % (i, j))
506  self.model.add_restraint(ljpr)
507  chainHierarchies = IMP.atom.get_by_type(self.protein, IMP.atom.CHAIN_TYPE)
508  peptideChainHierarchy = None
509  print "scaling atomic radii by scaling factor %s" % scalingFactor
510  self.ff.add_radii(self.protein) #reset radii back to original in preparation for rescaling
511  for pName in self.flexibleAtoms.keys():
512  particle = self.namesToParticles[pName]
513  xyzr = IMP.core.XYZR.decorate_particle(particle)
514  radius = xyzr.get_radius()
515  radius *= scalingFactor
516  xyzr.set_radius(radius)
517 
518  #add lennard-jones restraints between all fixed atoms and all flexible atoms
519  def addFixedFlexibleLjRestraints(self):
520  print "adding initial fixed flexible lj restraints"
521  fixedAtoms = self.getFixedAtoms()
522  sf = IMP.atom.ForceSwitch(6.0, 7.0)
524 
525  for flexPname in self.flexibleAtoms.keys():
526  flexParticle = self.namesToParticles[flexPname]
527  for fixedPname in fixedAtoms.keys():
528  fixedParticle = self.namesToParticles[fixedPname]
529  ljpr = IMP.core.PairRestraint(ps, [fixedParticle, flexParticle], "LennardJones %s_%s" % (flexPname, fixedPname))
530  self.model.add_restraint(ljpr)
531 
532  #actually do the work to create the DOPE restraint
533  def createDopeRestraint(self, pair):
534  q0 = self.quickParticleName(pair[0])
535  q1 = self.quickParticleName(pair[1])
536  #create restraint around this distance, add to model
537  prName = "NonBonded %s_%s" % (q0, q1)
538  pairRestraint = IMP.core.PairRestraint(self.dopePairScore, pair, prName)
539 
540  #print "Added non-bonded restraint between %s and %s" % (q0, q1)
541  self.model.add_restraint(pairRestraint)
542  maxNonBondedScore = float(self.getParam("max_non_bonded_score"))
543  self.model.set_maximum_score(pairRestraint, maxNonBondedScore)
544  #initialScore = pairRestraint.evaluate(0)
545 
546 
547  #add non-bonded restraints across close pairs. At least one atom in each pair should be flexible
548  def addClosePairNonBondedRestraints(self):
549  particlePairs = self.getNonBondedPairs()
550 
551  nonBondedRestraintType = self.getParam("non_bonded_restraint_type")
552  dopeCutoff = float(self.getParam("dope_cutoff"))
553 
555 
556  ps = 0
557  if (nonBondedRestraintType == "dope"):
558  IMP.atom.add_dope_score_data(self.protein)
559  ps = IMP.atom.DopePairScore(dopeCutoff)
560  self.dopePairScore = ps
561  nonBondedCount = 0
562  self.nonBondedPairs = self.initializeParticleNameDict(IMP.atom.get_leaves(self.protein))
563  for pair in particlePairs:
564 
565  if (self.skipNonBondedPair(pair) == 1):
566  continue
567 
568  self.createDopeRestraint(pair)
569 
570  nonBondedCount += 1
571  self.nonBondedPairs[self.quickParticleName(pair[0])][self.quickParticleName(pair[0])] = 1
572  self.nonBondedPairs[self.quickParticleName(pair[1])][self.quickParticleName(pair[1])] = 1
573 
574  print "Added %s non-bonded restraints" % nonBondedCount
575  self.model.evaluate(False)
576 
577 
578  def addCompleteNonBondedRestraints(self):
579  leaves = IMP.atom.get_leaves(self.protein)
580  counter = 0
581 
582  for i in range(len(leaves)):
583  iLeaf = leaves[i]
584  for j in range(i + 1, len(leaves)):
585  jLeaf = leaves[j]
586 
587  if (self.nonBondedPairs[self.quickParticleName(iLeaf)].has_key(self.quickParticleName(jLeaf)) == 1):
588  continue
589  if (self.skipNonBondedPair([iLeaf, jLeaf]) == 1):
590  continue
591 
592  self.createDopeRestraint([iLeaf, jLeaf])
593  counter += 1
594  #print "added complete restraint between %s and %s" % (self.quickParticleName(iLeaf), self.quickParticleName(jLeaf))
595  print "added %s restraints for %s particles" % (counter, len(leaves))
596  self.model.evaluate(False)
597 
598 
599  #lots of output on all restraints
600  def writeAllRestraints(self):
601  print "write all restraints:"
602  for r in IMP.get_restraints([self.model.get_root_restraint_set()]):
603  print self.quickRestraintName(r)
604 
605 
606 
607  #Read parameterss with md temperature / step schedule and save them
608  def initializeMdSchedule(self):
609  schedule = self.getParam("md_schedule")
610  stages = schedule.split()
611  totalSteps = 0
612  for stage in stages:
613  print "unpacking stage %s" % stage
614  [steps, temperature, excludedVolume] = stage.split('_')
615  totalSteps += int(steps)
616  self.totalMdSteps = totalSteps
617  self.mdStages = stages
618 
619 
620  #Creates roothandle for hdf5 objects interacting with md trajectory
621  #Either we create a new trajectory in this file and read it back in the same run, or we are reading a saved trajectory
622  def createMdRootHandle(self):
623 
624  mdFileName = ""
625  outputDir = self.getParam("output_directory")
626 
627  if (self.getParam("read_assignments_from_file") == "no"):
628  #create new hdf5 file to which md output will be written
629  mdFileName = self.getParam("md_trajectory_output_file")
630  fileName = os.path.join(outputDir, mdFileName)
631  print "creating rmf file with filename %s" % fileName
632  rh = RMF.create_rmf_file(fileName)
633  my_kc= rh.add_category("my data");
634  IMP.rmf.add_hierarchy(rh, self.protein)
635 
636  self.rootHandle = rh
637  else:
638  #use existing hdf5 file
639  mdFileName = self.getParam("saved_md_filename")
640 
641  rh = RMF.open_rmf_file(mdFileName)
642  IMP.rmf.set_hierarchies(rh, [self.protein])
643 
644  self.rootHandle = rh
645 
646  def evaluateModel(self):
647  return self.model.evaluate(False)
648 
649  def loadMdHelpers(self):
650 
651  self.createMdRootHandle()
652 
653  self.createMdOptimizer()
654 
655  self.addMdOptimizerStates()
656 
657  self.initializeMdSchedule()
658 
659  self.createdMdOptimizer = 1
660 
661 
662  #Create the optimizer object for running MD
663  def createMdOptimizer(self):
664  print "creating md object"
665  md = IMP.atom.MolecularDynamics(self.model)
666  md.optimize(1) # hack to get around bug where vx attributes in non-optimized particles are not being set
667  md.set_velocity_cap(100.0)
668  self.mdOptimizer = md
669 
670  def addMdOptimizerStates(self):
671 
672  #VelocityScalingOptimizerState
673  vsos = IMP.atom.VelocityScalingOptimizerState(IMP.atom.get_leaves(self.protein), 0, 0) #temperature is set later
674  vsIndex = self.mdOptimizer.add_optimizer_state(vsos)
675 
676  #SaveHierarchyConfigurationOptimizerState
677  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState([self.protein], self.rootHandle)
678  hdos.set_skip_steps(0)
679  self.mdOptimizer.add_optimizer_state(hdos)
680 
681  self.hdos = hdos
682  self.vsos = vsos
683 
684  def readTrajectories(self):
685 
686  cgFileName = self.getParam("cg_output_file")
687  bestCgScore = 10000000
688  bestCgScoreFile = ""
689  bestCgRmsd = 10000000
690  bestCgRmsdFile = ""
691 
692  outputDir = self.getParam("output_directory")
693  trajectoryFile = self.getParam("md_trajectory_output_file")
694  fullFile = os.path.join(outputDir, trajectoryFile)
695  rh = RMF.open_rmf_file(fullFile)
696  IMP.rmf.set_hierarchies(rh, [self.protein])
697  framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam("cg_interval")), self.protein)
698 
699  if (len(framesToRead) > 0):
700  for cgNumber in framesToRead:
701  #Open next cg trajectory
702  outputDir = self.getParam("output_directory")
703  fullCgFileName = os.path.join(outputDir, "%s%s" % (cgFileName, cgNumber))
704  rh = RMF.open_rmf_file(fullCgFileName)
705  IMP.rmf.set_hierarchies(rh, [self.protein])
706 
707  frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
708  IMP.rmf.load_frame(rh, frameCount - 1, self.protein)
709  score = self.model.evaluate(False)
710  rmsd = self.calculateNativeRmsd(self.flexibleAtoms)
711  print "cg number %s has score %s rmsd %s" % (cgNumber, score, rmsd)
712  if (score < bestCgScore):
713  bestCgScore = score
714  bestCgScoreFile = fullCgFileName
715  if (rmsd < bestCgRmsd):
716  bestCgRmsd = rmsd
717  bestCgRmsdFile = fullCgFileName
718 
719  #output best score information
720  self.singlePdbResults(bestCgScoreFile, -1, self.getParam("best_cg_score_output_file"))
721  self.singlePdbResults(bestCgRmsdFile, -1, self.getParam("best_cg_rmsd_output_file"))
722  self.singlePdbResults("%s%s" % (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file"))
723  finalCgRmsd = self.calculateNativeRmsd(self.flexibleAtoms)
724  self.bestCgScore = bestCgScore
725  self.bestCgRmsd = bestCgRmsd
726  self.finalCgRmsd = finalCgRmsd
727 
728 
729  def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
730 
731  fullTrajectoryFile = os.path.join(self.getParam("output_directory"), trajectoryFile)
732  fullOutputFile = os.path.join(self.getParam("output_directory"), outputPdbFile)
733  rh = RMF.open_rmf_file(fullTrajectoryFile)
734  IMP.rmf.set_hierarchies(rh, [self.protein])
735  if (frame == -1):
736  frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
737  IMP.rmf.load_frame(rh, frame, self.protein)
738  IMP.atom.write_pdb(self.protein, fullOutputFile)
739 
740  def calculateRmsd(self, otherProtein, flexibleAtoms):
741  otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(otherProtein)
742  otherVector = []
743  modelVector = []
744  for pName in otherNamesToParticles.keys():
745  if (flexibleAtoms.has_key(pName) == 0):
746  continue
747  otherParticle = otherNamesToParticles[pName]
748  modelParticle = self.namesToParticles[pName]
749  otherVector.append(IMP.core.XYZ.decorate_particle(otherParticle).get_coordinates())
750  modelVector.append(IMP.core.XYZ.decorate_particle(modelParticle).get_coordinates())
751  rmsd = IMP.atom.get_rmsd(otherVector, modelVector)
752  return rmsd
753 
754  def calculateNativeRmsd(self, flexibleAtoms):
755 
756  if (self.wroteNativeProtein == 0):
757  pdbName = self.getParam("native_pdb_input_file")
758  self.nativeModel = IMP.kernel.Model()
759  self.nativeProtein = IMP.atom.read_pdb(pdbName, self.nativeModel, IMP.atom.ATOMPDBSelector())
760  self.wroteNativeProtein = 1
761 
762  return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
763 
764 
765  #Run MD according to the temperature schedule read in from parameters
766  def runMolecularDynamics(self):
767  self.initializedFlexFlexLjRestraints = 0
768  if (self.fixedNonFlexibleAtoms == 0):
769  print "error: before running md, must fix non flexible atoms"
770  sys.exit()
771  stepsSoFar = 0
772  for stage in self.mdStages:
773  [steps, temperature, excludedVolume] = stage.split('_')
774  stepsSoFar += int(steps)
775  self.vsos.set_temperature(int(temperature))
776  if (excludedVolume == "1"): #add lennard jones restraints between flexible atoms and fixed atoms (don't add between flex atoms)
777  self.addFixedFlexibleLjRestraints()
778  elif (excludedVolume != "0"):
779  if (self.getParam("excluded_volume_type") == "lj"): #add lennard jones restraints between flexible atoms
780  self.addFlexFlexLjRestraints(float(excludedVolume))
781  elif (self.getParam("excluded_volume_type") == "ev"):
782  self.addExcludedVolumeRestraints() # add full excluded volume restraints (just take everything in the protein)
783  else:
784  "please set excluded_volume_type to either lj or ev"
785  sys.exit()
786  print "running md at temperature %s for %s steps" % (temperature, steps)
787  self.mdOptimizer.optimize(int(steps))
788 
789  IMP.atom.write_pdb(self.protein, os.path.join(self.getParam("output_directory"), "md_after_%s.pdb" % stepsSoFar))
790  print "model score after stage %s is %s" % (stage, self.model.evaluate(False))
791  print "done running md"
792 
793 
794 
795  def runAllCg(self):
796  cgInterval = int(self.getParam("cg_interval"))
797  outputDir = self.getParam("output_directory")
798  trajectoryFile = self.getParam("md_trajectory_output_file")
799  fullFile = os.path.join(outputDir, trajectoryFile)
800  print "open rmf %s" % fullFile
801  rh = RMF.open_rmf_file(fullFile)
802  IMP.rmf.set_hierarchies(rh, [self.protein])
803  framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, cgInterval, self.protein)
804 
805  for i in framesToRead:
806  cgFileName = "%s%s" % (self.getParam("cg_output_file"), i)
807  self.applyCg(i, cgFileName)
808 
809  #Apply CG to each step of the saved MD trajectory. Writes results to same file storing the MD trajectory
810  def applyCg(self, mdStepStart, cgFileName):
811 
812  cgSteps = int(self.getParam("cg_steps"))
813 
814  #load md step specified by calling object
815  print "apply cg: loading md frame for cg step start %s" % mdStepStart
816  IMP.rmf.load_frame(self.rootHandle, mdStepStart, self.protein)
817 
818  #create new hdf5 file to which cg output will be written
819  outputDir = self.getParam("output_directory")
820  fileName = os.path.join(outputDir, cgFileName)
821  print "creating cg hdf5 file %s" % fileName
822  rh = RMF.create_rmf_file(fileName)
823  my_kc= rh.add_category("my data");
824  IMP.rmf.add_hierarchy(rh, self.protein)
825 
826  #apply cg
827  cg = IMP.core.ConjugateGradients(self.model)
828  firstScore = self.model.evaluate(False)
829 
830  print "creating optimizer state"
831  hdos = IMP.rmf.SaveHierarchyConfigurationOptimizerState([self.protein], rh)
832  hdos.set_skip_steps(0)
833  cg.add_optimizer_state(hdos) #hdos is the optimizer state writing configurations to disk
834 
835  print "running cg"
836  cg.optimize(cgSteps)
837  secondScore = self.model.evaluate(False)
838  print "cg score after md step %s before %s after %s" % (mdStepStart, firstScore, secondScore)
839  return secondScore
840 
841 
842  #Tell the optimizer not to move atoms we have determined ar fixed
843  def fixNonFlexibleAtoms(self):
844  if (self.createdMdOptimizer == 0):
845  self.createMdOptimizer()
846  #print "ERROR: must create md optimizer and optimize once before set_particles_are_optimized is called"
847  #sys.exit()
848 
849  self.fixedNonFlexibleAtoms = 1
850  leaves = IMP.atom.get_leaves(self.protein)
851  for leaf in leaves:
852  if (self.flexibleAtoms.has_key(self.quickParticleName(leaf)) == 0):
853  xyzDecorator = IMP.core.XYZ.decorate_particle(leaf)
854  xyzDecorator.set_coordinates_are_optimized(0)
855 
856 
857  def writeOutput(self):
858  restraintCount = self.model.get_number_of_restraints()
859  leaves = IMP.atom.get_leaves(self.protein)
860  flexiblePeptideAtoms = 0
861  flexibleProteinAtoms = 0
862  fixedPeptideAtoms = 0
863  fixedProteinAtoms = 0
864 
865  for leaf in leaves:
866  if (self.flexibleAtoms.has_key(self.quickParticleName(leaf)) == 0):
867  if (self.getChain(leaf) == self.getParam("peptide_chain")):
868  fixedPeptideAtoms += 1
869  else:
870  fixedProteinAtoms += 1
871  else:
872  if (self.getChain(leaf) == self.getParam("peptide_chain")):
873  flexiblePeptideAtoms += 1
874  else:
875  flexibleProteinAtoms += 1
876  print "Flexible peptide atoms: %s" % flexiblePeptideAtoms
877  print "Fixed peptide atoms: %s" % fixedPeptideAtoms
878  print "Flexible protein atoms: %s" % flexibleProteinAtoms
879  print "Fixed protein atoms: %s" % fixedProteinAtoms
880  print "Total number of restraints: %s" % restraintCount
881 
882  def writeOsOutput(self):
883 
884  print "Best cg score: %s" % self.bestCgScore
885  print "best cg rmsd: %s" % self.bestCgRmsd
886  print "final cg rmsd: %s" % self.finalCgRmsd
Applies a SingletonScore to each Singleton in a list.
Chain get_chain(Hierarchy h)
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Score the angle based on a UnaryFunction,.
static XYZR decorate_particle(::IMP::kernel::Particle *p)
Definition: XYZR.h:47
Simple conjugate gradients optimizer.
algebra::BoundingBoxD< 3 > get_bounding_box(const Hierarchy &h)
Get a bounding box for the Hierarchy.
Hierarchy get_residue(Hierarchy mhd, unsigned int index)
Get the residue with the specified index.
Return all close unordered pairs of particles taken from the SingletonContainer.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:88
Vector3D get_random_vector_in(const Cylinder3D &c)
Generate a random vector in a cylinder with uniform density.
CHARMM force field parameters.
static Float get_k_from_standard_deviation(Float sd, Float t=297.15)
Return the k to use for a given Gaussian standard deviation.
Definition: core/Harmonic.h:63
Maintains temperature during molecular dynamics by velocity scaling.
Select all non-alternative ATOM records.
Definition: pdb.h:63
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
double get_rmsd(const Selection &s0, const Selection &s1, const IMP::algebra::Transformation3D &tr_for_second=IMP::algebra::get_identity_transformation_3d())
Definition: distance.h:47
Lennard-Jones score between a pair of particles.
Simple molecular dynamics optimizer.
static Atom decorate_particle(::IMP::kernel::Particle *p)
Definition: atom/Atom.h:241
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
Score the improper dihedral based on a UnaryFunction,.
Score the bond based on a UnaryFunction,.
See IMP.core for more information.
See IMP.algebra for more information.
Prevent a set of particles and rigid bodies from inter-penetrating.
void add_dope_score_data(atom::Hierarchy h)
static XYZ decorate_particle(::IMP::kernel::Particle *p)
Definition: XYZ.h:51
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
compute the distance between two vectors
Definition: VectorD.h:401
Applies a PairScore to a Pair.
Definition: PairRestraint.h:30
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Smooth interaction scores by switching the derivatives (force switch).
void read_pdb(base::TextInput input, int model, Hierarchy h)
Hierarchies get_leaves(const Selection &h)
See IMP.rmf for more information.
Definition: associations.h:20
See IMP.domino for more information.
Definition: analysis.h:15
void load_frame(RMF::FileConstHandle file, int frame)
Class for storing model, its restraints, constraints, and particles.
Harmonic function (symmetric about the mean)
Definition: core/Harmonic.h:25