1 from __future__
import print_function
13 from .
import atomicDominoUtilities
18 def __init__(self, model, protein, parameterFileName):
20 self.protein = protein
21 self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
23 self.readParameters(parameterFileName)
24 self.wroteNativeProtein = 0
27 def readParameters(self, parameterFileName):
28 self.parameters = atomicDominoUtilities.readParameters(
32 currentMem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
33 currentMem *= 10 ** -6
34 print(
"log memory: %s" % currentMem)
35 if (currentMem > self.maxMem):
36 self.maxMem = currentMem
39 def getParam(self, paramName):
40 paramValue = self.parameters[paramName]
43 def loadDominoHelpers(self):
44 self.subsetStateScores = {}
45 self.subsetRestraintScores = {}
46 self.subsetStateFailures = {}
47 self.nodesToAssignmentCount = {}
49 self.totalAssignments = 0
50 outputDir = self.getParam(
"output_directory")
51 mtreeCytoscapeAssignmentFile = self.getParam(
52 "mtree_cytoscape_assignment_file")
53 self.mtreeCytoscapeAssignmentFh = open(
54 os.path.join(outputDir, mtreeCytoscapeAssignmentFile),
'w')
55 self.mtreeCytoscapeAssignmentFh.write(
"Assignments\n")
57 self.mtNamesToIndices = {}
59 self.subsetNamesToAssignmentFiles = {}
65 def logTimePoint(self, reset):
68 timeDifference = newTime - self.timePoint
69 timeDifference = round(timeDifference, 0)
70 self.timePoint = newTime
74 self.timePoint = newTime
77 def createParticleStatesTable(self):
79 for particleName
in self.particleInfo.keys():
82 dataArray = self.particleInfo[particleName]
83 for i
in range(len(dataArray)):
84 [state, center] = dataArray[i]
85 statesToCenters[state] = center
88 for stateIndex
in sorted(statesToCenters.keys()):
90 statesList.append(vector3d)
94 self.dominoPst.set_particle_states(
95 self.namesToParticles[particleName],
98 def quickParticleName(self, particle):
99 return atomicDominoUtilities.quickParticleName(particle)
101 def filterAssignments(self, assignments, subset, nodeIndex, rssft):
108 restraintList.append(r)
115 sFilter = rssft.get_subset_filter(subset, filteredSubsets)
117 filteredAssignments = []
118 for assignment
in assignments:
120 if (sFilter
is None or sFilter.get_is_ok(assignment)):
123 filteredAssignments.append(assignment)
126 fraction = (passedCounter * 1.0) / (stateCounter * 1.0)
127 print(
"%s states passed out of %s total for this subset (fraction %s)"
128 % (passedCounter, stateCounter, fraction))
129 if (passedCounter == 0):
130 print(
"subset %s had 0 assignments (out of %s) pass. Exiting..."
131 % (subset, stateCounter))
134 return filteredAssignments
138 def quickSubsetName(self, subset):
139 cleanName = self.cleanVertexName(subset)
140 atomNameList = cleanName.split(
" ")
141 sortedList = sorted(atomNameList)
143 name =
" ".join(sortedList)
146 def getNodeIndexList(self):
147 return self.mt.get_vertices()
149 def getLeafNodeIndexList(self):
150 leafNodeIndexList = []
151 for subset
in self.subsets:
152 index = self.getMtIndexForSubset(self.quickSubsetName(subset))
153 leafNodeIndexList.append(index)
154 return leafNodeIndexList
157 def createSampler(self):
160 s.set_merge_tree(self.mt)
162 filterTables.append(self.rssft)
163 s.set_subset_filter_tables(filterTables)
165 s.set_assignments_table(self.lat)
167 if (self.getParam(
"cross_subset_filtering") == 1):
168 s.set_use_cross_subset_filtering(1)
175 def createSubsets(self):
176 self.initializeParticleStatesTable()
178 [self.model.get_root_restraint_set()],
180 print(
"interaction graph:")
183 print(
"junction tree:")
185 self.subsetNamesToSubsets = {}
189 for index
in mt.get_vertices():
190 subset = mt.get_vertex_name(index)
191 subsetName = self.cleanVertexName(subset)
192 self.mtNamesToIndices[subsetName] = index
195 for subset
in subsets:
196 print(
"created subset %s" % subset)
197 subsetName = self.quickSubsetName(subset)
198 self.subsetNamesToSubsets[subsetName] = subset
206 self.subsets = subsets
208 self.parentSiblingMap = {}
209 self.parentSiblingMap[self.mt.get_vertices()[-1]] = {}
210 self.createSiblingMap(self.mt.get_vertices()[-1])
212 def getMtIndexForSubset(self, subsetName):
213 for index
in self.mt.get_vertices():
214 mtNode = self.mt.get_vertex_name(index)
215 mtName = self.cleanVertexName(mtNode)
216 mtName = mtName.rstrip()
217 mtName = mtName.lstrip()
219 mtNameList = mtName.split(
" ")
220 subsetNameList = subsetName.split(
" ")
221 mtNameListSorted = sorted(mtNameList)
222 subsetNameListSorted = sorted(subsetNameList)
223 if (
" ".join(subsetNameListSorted) ==
" ".join(mtNameListSorted)):
226 print(
"did not find merge tree index for subset name %s" % subsetName)
229 def getDominoParticleNames(self):
230 particles = self.dominoPst.get_particles()
231 particleNameList = []
232 for particle
in particles:
233 pName = self.quickParticleName(particle)
234 particleNameList.append(pName)
236 return particleNameList
238 def getSubsetNameList(self):
240 for subset
in self.subsets:
241 name = self.quickSubsetName(subset)
242 subsetNameList.append(name)
243 return subsetNameList
245 def getMtIndexToParticles(self):
247 mtIndexToParticles = {}
248 allVertices = self.mt.get_vertices()
249 for nodeIndex
in allVertices:
250 subset = self.mt.get_vertex_name(nodeIndex)
251 particleList = self.quickSubsetName(subset)
252 mtIndexToParticles[nodeIndex] = particleList
253 print(
"createtMtIndexToParticles: index %s is particleList %s "
254 % (nodeIndex, particleList))
256 return mtIndexToParticles
258 def readTrajectoryFile(
271 scoreOutputFh = open(scoreOutputFile,
'w')
276 print(
"execption in loading frame %s" % i)
278 score = self.model.evaluate(
False)
285 rmsd = self.calculateNativeRmsd(flexibleAtoms)
286 scoreOutputFh.write(
"%s\t%s\t%s\n" % (i, score, rmsd))
287 if (score < bestScore):
291 if (rmsd < bestRmsd):
294 if (skipDomino == 0):
295 for atomName
in atomList:
297 particle = self.namesToParticles[atomName]
300 gridIndex = self.snapToGrid(particle)
301 center = self.grid.get_center(gridIndex)
303 for coordinate
in center:
304 pythonCenter.append(coordinate)
308 if ((gridIndex
in self.particleStatesSeen[atomName]) == 0):
313 list(self.particleStatesSeen[atomName].keys()))
314 self.particleStatesSeen[
316 gridIndex] = currentSize
317 state = self.particleStatesSeen[atomName][gridIndex]
318 self.particleInfo[atomName].append([state, pythonCenter])
320 print(
"didn't add domino states due to skip parameters")
321 return [bestScore, bestScoreFrame, bestRmsd, bestRmsdFrame]
326 def getParticleGridIndex(self, leaf):
328 coordinates = xyzDecorator.get_coordinates()
330 extendedIndex = self.grid.get_extended_index(coordinates)
331 if (self.grid.get_has_index(extendedIndex) == 0):
333 if (self.getParam(
"grid_type") ==
"sparse"):
334 self.grid.add_voxel(extendedIndex, 1)
336 self.grid.add_voxel(extendedIndex)
338 index = self.grid.get_index(extendedIndex)
343 def snapToGrid(self, particle):
345 index = self.getParticleGridIndex(particle)
346 center = self.grid.get_center(index)
348 xyzDecorator.set_coordinates(center)
353 def discretizeNativeProtein(self):
355 outputDir = self.getParam(
"output_directory")
356 nativeSnappedFile = self.getParam(
"native_protein_snapped_output_file")
360 self.snapToGrid(leaf)
369 def getPeptideCa(self):
372 residues = IMP.atom.get_by_type(self.protein, IMP.atom.RESIDUE_TYPE)
373 peptideIndicesToResidues = {}
374 for residueH
in residues:
376 chainId = chain.get_id()
378 if (chainId == self.getParam(
"peptide_chain")):
379 peptideIndicesToResidues[residue.get_index()] = residue
383 minPeptide = min(sorted(peptideIndicesToResidues.keys()))
384 maxPeptide = max(sorted(peptideIndicesToResidues.keys()))
385 centerPeptide = round(((maxPeptide - minPeptide) / 2 + minPeptide), 0)
389 centerName = atomicDominoUtilities.makeParticleName(
390 self.getParam(
"peptide_chain"),
393 centerParticle = self.namesToParticles[centerName]
394 return centerParticle
398 def createGrid(self):
400 protBb = IMP.atom.get_bounding_box(self.protein)
402 gridSpacing = float(self.getParam(
"grid_spacing"))
403 bufferSpace = float(self.getParam(
"grid_buffer_space"))
405 protBb += bufferSpace
407 if (self.getParam(
"grid_type") ==
"sparse"):
408 ca = self.getPeptideCa()
411 gridSpacing, xyzCa.get_coordinates())
419 def initializeParticleStatesTable(self):
422 restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
423 self.protein, self.model, self.namesToParticles)
425 for p
in restrainedParticles:
428 dominoPst.set_particle_states(p, xyzStates)
430 self.dominoPst = dominoPst
432 def createUniqueLeafAssignments(self, particleNameList, particleInfo):
434 size = len(particleInfo[particleNameList[0]])
437 for i
in range(size):
439 for particleName
in particleNameList:
440 dataArray = particleInfo[particleName]
441 [state, center] = dataArray[i]
442 nextAssignment.append(state)
443 allAssignments.append(nextAssignment)
446 uniqueAssignments = {}
447 for assignment
in allAssignments:
450 for index
in assignment:
451 stateString = stateString + str(index) +
"_"
452 uniqueAssignments[stateString] = assignment
453 finalAssignments = []
456 for stateString
in uniqueAssignments.keys():
457 assignmentList = uniqueAssignments[stateString]
459 finalAssignments.append(assignment)
460 return finalAssignments
464 def readMdTrajectory(self, atomList, flexibleAtoms):
467 outputDir = self.getParam(
"output_directory")
468 trajectoryFile = self.getParam(
"md_trajectory_output_file")
469 fullFile = os.path.join(outputDir, trajectoryFile)
470 rh = RMF.open_rmf_file(fullFile)
471 IMP.rmf.set_hierarchies(rh, [self.protein])
472 framesToRead = atomicDominoUtilities.getMdIntervalFrames(
473 rh, int(self.getParam(
"md_interval")), self.protein)
474 print(
"preparing to read md frames %s" % framesToRead)
481 particleStatesSeen = {}
482 for atomName
in atomList:
483 particleInfo[atomName] = []
484 particleStatesSeen[atomName] = {}
486 self.particleStatesSeen = particleStatesSeen
487 self.particleInfo = particleInfo
490 mdScoreOutputFile = os.path.join(
492 self.getParam(
"md_score_output_file"))
496 bestRmsdFrame] = self.readTrajectoryFile(
506 self.singlePdbResults(
509 self.getParam(
"best_md_score_output_file"))
511 self.singlePdbResults(
514 self.getParam(
"best_md_rmsd_output_file"))
515 self.singlePdbResults(
518 self.getParam(
"final_md_frame_output_file"))
520 self.bestMdScore = round(bestMdScore, 2)
521 self.bestMdRmsd = round(bestRmsd, 2)
522 self.bestMdScoreFrame = bestScoreFrame
523 self.bestMdRmsdFrame = bestRmsdFrame
525 def readCgTrajectories(self, atomList, flexibleAtoms):
527 cgFileName = self.getParam(
"cg_output_file")
528 bestCgScore = 10000000
530 bestCgRmsd = 10000000
533 outputDir = self.getParam(
"output_directory")
534 trajectoryFile = self.getParam(
"md_trajectory_output_file")
535 fullFile = os.path.join(outputDir, trajectoryFile)
536 rh = RMF.open_rmf_file(fullFile)
537 IMP.rmf.set_hierarchies(rh, [self.protein])
538 framesToRead = atomicDominoUtilities.getMdIntervalFrames(
539 rh, int(self.getParam(
"cg_interval")), self.protein)
541 skipCgDomino = int(self.getParam(
"skip_cg_domino"))
543 if (len(framesToRead) > 0):
544 for cgNumber
in framesToRead:
546 outputDir = self.getParam(
"output_directory")
547 fullCgFileName = os.path.join(
549 (cgFileName, cgNumber))
550 rh = RMF.open_rmf_file(fullCgFileName)
551 IMP.rmf.set_hierarchies(rh, [self.protein])
554 frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
557 if (frameCount > 20):
558 startFrameCount = frameCount - 20
560 for i
in range(startFrameCount, frameCount):
564 cgScoreOutputFile = os.path.join(
566 (self.getParam(
"cg_score_output_file"), cgNumber))
567 [cgScore, cgScoreFrame, cgRmsd, cgRmsdFrame] = \
568 self.readTrajectoryFile(atomList, rh, cgFrames,
569 cgScoreOutputFile, skipCgDomino,
571 print(
"cg number %s rmsd %s score %s"
572 % (cgNumber, cgRmsd, cgScore))
574 if (cgScore < bestCgScore):
575 bestCgScore = cgScore
576 bestCgScoreFile = fullCgFileName
577 if (cgRmsd < bestCgRmsd):
579 bestCgRmsdFile = fullCgFileName
582 self.singlePdbResults(
585 self.getParam(
"best_cg_score_output_file"))
586 self.singlePdbResults(
589 self.getParam(
"best_cg_rmsd_output_file"))
590 self.singlePdbResults(
591 "%s%s" % (cgFileName, framesToRead[-1]),
592 -1, self.getParam(
"final_cg_frame_output_file"))
593 finalCgRmsd = self.calculateNativeRmsd(flexibleAtoms)
594 print(
"final cg rmsd is %s " % finalCgRmsd)
595 self.bestCgScore = round(bestCgScore, 2)
596 self.bestCgRmsd = round(bestCgRmsd, 2)
597 self.bestCgScoreFile = bestCgScoreFile
598 self.bestCgRmsdFile = bestCgRmsdFile
600 def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
602 fullTrajectoryFile = os.path.join(
603 self.getParam(
"output_directory"),
605 fullOutputFile = os.path.join(
606 self.getParam(
"output_directory"),
608 rh = RMF.open_rmf_file(fullTrajectoryFile)
609 IMP.rmf.set_hierarchies(rh, [self.protein])
611 frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
615 def calculateRmsd(self, otherProtein, flexibleAtoms):
616 otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
620 for pName
in otherNamesToParticles.keys():
621 if ((pName
in flexibleAtoms) == 0):
623 otherParticle = otherNamesToParticles[pName]
624 modelParticle = self.namesToParticles[pName]
625 otherVector.append(
IMP.core.XYZ(otherParticle).get_coordinates())
626 modelVector.append(
IMP.core.XYZ(modelParticle).get_coordinates())
630 def calculateNativeRmsd(self, flexibleAtoms):
632 if (self.wroteNativeProtein == 0):
633 pdbName = self.getParam(
"native_pdb_input_file")
639 self.wroteNativeProtein = 1
641 return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
643 def calculateTrajectoryRmsd(
648 pdbName = self.getParam(
"native_pdb_input_file")
653 outputDir = self.getParam(
"output_directory")
654 fullFile = os.path.join(outputDir, trajectoryFile)
655 print(
"open calculate traj rmf %s" % fullFile)
656 rh = RMF.open_rmf_file(fullFile)
657 IMP.rmf.set_hierarchies(rh, [otherProtein])
658 if (trajectoryFrame == -1):
659 trajectoryFrame = IMP.rmf.get_number_of_frames(
660 rh, otherProtein) - 1
662 return self.calculateRmsd(otherProtein, flexibleAtoms)
664 def createAllSubsetAssignments(self):
668 self.model.get_root_restraint_set(),
671 leafNodeIndexList = self.getLeafNodeIndexList()
673 for nodeIndex
in leafNodeIndexList:
675 subset = self.mt.get_vertex_name(nodeIndex)
676 particleNameList = []
677 for particle
in subset:
678 particleNameList.append(self.quickParticleName(particle))
679 print(
"creating initial assignments for leaf %s" % nodeIndex)
681 assignments = self.createUniqueLeafAssignments(
682 particleNameList, self.particleInfo)
683 filteredAssignments = self.filterAssignments(
684 assignments, subset, nodeIndex, rssft)
688 for assignment
in filteredAssignments:
689 packedAssignmentContainer.add_assignment(assignment)
690 lat.set_assignments(subset, packedAssignmentContainer)
696 root = self.mt.get_vertices()[-1]
697 completeAc = self.loadAssignments(root)
698 self.completeAc = completeAc
700 def loadAssignments(self, nodeIndex):
702 children = self.mt.get_out_neighbors(nodeIndex)
703 subset = self.mt.get_vertex_name(nodeIndex)
704 heapCount = int(self.getParam(
"heap_count"))
706 heapCount, self.rssft.get_subset_filter(subset, []))
707 if len(children) == 0:
708 print(
"computing assignments for leaf %s" % nodeIndex)
710 self.sampler.load_vertex_assignments(nodeIndex, mine)
711 print(
"leaf node %s has %s leaf assignments"
712 % (nodeIndex, mine.get_number_of_assignments()))
714 if (children[0] > children[1]):
715 children = [children[1], children[0]]
717 firstAc = self.loadAssignments(children[0])
718 secondAc = self.loadAssignments(children[1])
720 self.sampler.load_vertex_assignments(
721 nodeIndex, firstAc, secondAc, mine)
723 timeDifference = self.logTimePoint(0)
724 print(
"Done Parent %s Assignments %s first child %s "
725 "second child %s time %s"
726 % (nodeIndex, mine.get_number_of_assignments(),
727 firstAc.get_number_of_assignments(),
728 secondAc.get_number_of_assignments(), timeDifference))
729 self.totalAssignments += mine.get_number_of_assignments()
733 def writeOutput(self, flexibleAtoms, startTime):
735 bestDominoScore = 100000
737 finalAssignments = self.completeAc.get_assignments()
738 for assignment
in finalAssignments:
740 self.dominoPst.get_subset(),
743 score = self.model.evaluate(
False)
744 if (score < bestDominoScore):
745 bestAssignment = assignment
746 bestDominoScore = round(score, 2)
747 print(
"best domino score is %s " % bestDominoScore)
748 print(
"best md score is %s" % self.bestMdScore)
749 print(
"best md rmsd is %s" % self.bestMdRmsd)
750 print(
"best cg score is %s" % self.bestCgScore)
751 print(
"best cg rmsd is %s" % self.bestCgRmsd)
752 print(
"merge tree contained %s total assignments"
753 % self.totalAssignments)
756 self.dominoPst.get_subset(),
759 dominoVsMdRmsd = round(
760 self.calculateTrajectoryRmsd(
762 "md_trajectory_output_file"),
763 self.bestMdScoreFrame,
770 os.path.join(self.getParam(
"output_directory"),
771 self.getParam(
"minimum_domino_score_pdb")))
773 dominoVsCgRmsd = round(
774 self.calculateTrajectoryRmsd(
775 self.bestCgScoreFile,
779 dominoMinimizedScore = round(self.model.evaluate(
False), 2)
780 dominoRmsd = round(self.calculateNativeRmsd(flexibleAtoms), 2)
782 runTime = round(time.time() - startTime, 2)
783 print(
"final domino score (after cg): %s" % dominoMinimizedScore)
784 print(
"final domino rmsd: %s" % dominoRmsd)
785 print(
"best domino rmsd with best md score: %s" % dominoVsMdRmsd)
786 print(
"domino rmsd with best cg score: %s" % dominoVsCgRmsd)
787 print(
"Final Results\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
788 % (self.bestMdScore, self.bestMdRmsd, self.bestCgScore,
789 self.bestCgRmsd, bestDominoScore, dominoRmsd,
790 dominoMinimizedScore, dominoVsCgRmsd, self.totalAssignments,
791 self.maxMem, runTime))
796 def createSubsetFromParticles(self, particleNames):
797 particleNameList = particleNames.split(
" ")
799 for particleName
in particleNameList:
800 particle = self.namesToParticles[particleName]
801 particleList.append(particle)
804 return [subset, particleList]
806 def createHdf5AssignmentContainer(self, index, particleNames, read):
807 root = self.getAssignmentContainerRoot(index, read)
808 print(
"got root for index %s" % index)
811 dataset = root.get_child_index_data_set_2d(str(index))
813 dataset = root.add_child_index_data_set_2d(str(index))
814 print(
"added child index dataset")
816 [subset, particleOrder] = self.createSubsetFromParticles(particleNames)
817 print(
"created subset for index %s" % index)
818 hdf5Ac = IMP.domino.create_assignments_container(
819 dataset, subset, particleOrder)
820 print(
"returning from create")
822 for nextInt
in order:
823 print(
"next int is %s" % nextInt)
824 return [subset, hdf5Ac]
826 def loadAssignmentsParallel(
831 mtIndexToSubsetOrder,
835 if ((
"firstChild" in mtIndexToNodeInfo[nodeIndex]) == 0):
836 print(
"writing file for leaf index %s" % nodeIndex)
838 self.createAssignmentsParallel(
845 beginTime = self.logTimePoint(1)
846 firstChildIndex = mtIndexToNodeInfo[nodeIndex][
"firstChild"]
847 [firstSubset, firstAc] = self.createHdf5AssignmentContainer(
848 firstChildIndex, mtIndexToSubsetOrder[firstChildIndex], 1)
849 firstAcCreateTime = self.logTimePoint(0)
851 secondChildIndex = mtIndexToNodeInfo[nodeIndex][
"secondChild"]
852 [secondSubset, secondAc] = self.createHdf5AssignmentContainer(
853 secondChildIndex, mtIndexToSubsetOrder[secondChildIndex], 1)
854 secondAcCreateTime = self.logTimePoint(0)
856 print(
"getting assignments for nodeIndex %s first child %s "
858 % (nodeIndex, firstChildIndex, secondChildIndex))
859 firstChildParticles = mtIndexToSubsetOrder[firstChildIndex]
860 secondChildParticles = mtIndexToSubsetOrder[secondChildIndex]
861 myParticles = mtIndexToParticles[nodeIndex]
863 print(
"first child particles %s\nsecond child particles %s\n"
865 % (firstChildParticles, secondChildParticles, myParticles))
866 for p
in firstSubset:
867 print(
"next particle in first subset: %s"
868 % self.quickParticleName(p))
870 for p
in secondSubset:
871 print(
"next particle in second subset: %s"
872 % self.quickParticleName(p))
874 for assignment
in firstAc.get_assignments():
875 print(
"next assignment for first child %s: %s"
876 % (firstChildIndex, assignment))
878 for assignment
in secondAc.get_assignments():
879 print(
"next assignment for second child %s: %s"
880 % (secondChildIndex, assignment))
882 [mySubset, myAc] = self.createHdf5AssignmentContainer(
883 nodeIndex, mtIndexToParticles[nodeIndex], 0)
884 print(
"done creating hdf5")
885 prepTime = self.logTimePoint(0)
887 self.model.get_root_restraint_set(),
898 heapTime = self.logTimePoint(0)
900 addTime = self.logTimePoint(0)
901 for assignment
in myAc.get_assignments():
902 print(
"loadAssignmentsParallel next assignment for %s: %s"
903 % (nodeIndex, assignment))
904 doneTime = self.logTimePoint(0)
905 firstChildCount = firstAc.get_number_of_assignments()
906 secondChildCount = secondAc.get_number_of_assignments()
907 print(
"first count: %s second count: %s begin: %s firstAc: %s "
908 "secondAc: %s prep: %s heap: %s add: %s done: %s"
909 % (firstChildCount, secondChildCount, beginTime,
910 firstAcCreateTime, secondAcCreateTime, prepTime,
911 heapTime, addTime, doneTime))
912 subsetOrder = self.getSubsetOrderList(mySubset)
915 def createAssignmentsParallel(self, particleInfo, nodeIndex,
918 subsetName = mtIndexToParticles[nodeIndex]
919 print(
"starting assignments parallel leaf index %s subset name %s"
920 % (nodeIndex, subsetName))
921 [subset, particleList] = self.createSubsetFromParticles(subsetName)
923 particleNameList = subsetName.split(
" ")
926 finalAssignments = self.createUniqueLeafAssignments(
927 particleNameList, particleInfo)
930 self.model.get_root_restraint_set(),
932 filteredAssignments = self.filterAssignments(
933 finalAssignments, subset, nodeIndex, rssft)
934 root = self.getAssignmentContainerRoot(nodeIndex, 0)
935 dataset = root.add_child_index_data_set_2d(str(nodeIndex))
936 dataset.set_size([0, len(subset)])
938 hdf5AssignmentContainer = IMP.domino.create_assignments_container(
939 dataset, subset, particleOrder)
940 for assignment
in filteredAssignments:
941 hdf5AssignmentContainer.add_assignment(assignment)
942 for assignment
in hdf5AssignmentContainer.get_assignments():
943 print(
"hdf5 assignment container node %s next assignmet %s"
944 % (nodeIndex, assignment))
947 subsetOrder = self.getSubsetOrderList(subset)
948 print(
"leaf node returning with order %s" % subsetOrder)
953 def writePymolData(self):
955 outputDir = self.getParam(
"output_directory")
957 pymolInteractions = self.getParam(
"pymol_interactions_file")
963 pymolSubsets = self.getParam(
"pymol_subsets_file")
971 def cleanVertexName(self, vertexName):
975 nodeRe = re.compile(
r'Subset\(\[(.*?)\s*\]')
976 secondNodeRe = re.compile(
r'\[(.*?)\s*\]')
977 node = nodeRe.search(str(vertexName))
978 secondNode = secondNodeRe.search(str(vertexName))
982 foundName = node.group(1)
984 foundName = secondNode.group(1)
985 vertexNameFinal = foundName.replace(
'"',
'')
986 return vertexNameFinal
988 def getSubsetOrderList(self, subset):
990 for particle
in subset:
991 name = self.quickParticleName(particle)
992 subsetOrderList.append(name)
993 subsetOrder =
" ".join(subsetOrderList)
996 def checkAssignments(self, subset, nodeIndex, particleOrder):
997 print(
"reading back in assignments for leaf index %s" % nodeIndex)
998 root = self.getAssignmentContainerRoot(nodeIndex, 1)
999 dataset = root.get_child_index_data_set_2d(str(nodeIndex))
1000 hdf5 = IMP.domino.create_assignments_container(
1001 dataset, subset, particleOrder)
1002 for assignment
in hdf5.get_assignments():
1003 print(
"leaf index %s read back in %s" % (nodeIndex, assignment))
1005 def createSamplerLite(self):
1008 if (self.getParam(
"cross_subset_filtering") == 1):
1009 s.set_use_cross_subset_filtering(1)
1013 def createSiblingMap(self, parentIndex):
1015 children = self.mt.get_out_neighbors(parentIndex)
1016 if (len(children) > 0):
1017 firstChild = children[0]
1018 secondChild = children[1]
1019 self.parentSiblingMap[firstChild] = {}
1020 self.parentSiblingMap[firstChild][
"sibling"] = secondChild
1021 self.parentSiblingMap[firstChild][
"parent"] = parentIndex
1023 self.parentSiblingMap[secondChild] = {}
1024 self.parentSiblingMap[secondChild][
"sibling"] = firstChild
1025 self.parentSiblingMap[secondChild][
"parent"] = parentIndex
1026 print(
"created map for parent %s first child %s second child %s"
1027 % (parentIndex, firstChild, secondChild))
1029 self.parentSiblingMap[parentIndex][
"firstChild"] = firstChild
1030 self.parentSiblingMap[parentIndex][
"secondChild"] = secondChild
1032 self.createSiblingMap(firstChild)
1033 self.createSiblingMap(secondChild)
1035 def getMtIndexToNodeInfo(self):
1036 return self.parentSiblingMap
1038 def getLeafParentNodeIndexList(self):
1039 leafParentNodeIndexList = {}
1040 leafNodeIndexList = self.getLeafNodeIndexList()
1041 for leafIndex
in leafNodeIndexList:
1042 parent = self.parentSiblingMap[leafIndex][
"parent"]
1043 leafParentNodeIndexList[parent] = 1
1044 return leafParentNodeIndexList
1046 def getMtIndexToNameList(self):
1048 for index
in self.mt.get_vertices():
1049 name = self.mt.get_vertex_name(index)
1050 mtIndexToNames[index] = name
1051 return mtIndexToNames
1053 def getAssignmentContainerRoot(self, subsetIndex, read):
1054 outputDir = self.getParam(
"output_directory")
1055 filePrefix = self.getParam(
"subset_assignment_output_file")
1056 assignmentFileName = os.path.join(
1058 (filePrefix, subsetIndex))
1059 print(
"creating hdf5 file with name %s" % assignmentFileName)
1062 root = RMF.open_hdf5_file(assignmentFileName)
1064 root = RMF.create_hdf5_file(assignmentFileName)
1071 def writeVisualization(self):
1073 self.writeCytoscapeIgInput()
1074 self.writeCytoscapeJtInput()
1075 self.writeCytoscapeMtInput()
1076 self.writeCytoscapeScripts()
1077 self.writePymolData()
1079 def writeCytoscapeScripts(self):
1080 outputDir = self.getParam(
"output_directory")
1081 mTreeCytoscapeInput = self.getParam(
"mtree_cytoscape_input_file")
1082 mTreeCytoscapeAssignments = self.getParam(
1083 "mtree_cytoscape_assignment_file")
1084 mTreeCytoscapeAtomChains = self.getParam(
1085 "mtree_cytoscape_atom_chain_file")
1086 mTreeCytoscapeAtomSummary = self.getParam(
1087 "mtree_cytoscape_atom_summary_file")
1089 mTreeCytoscapeScript = self.getParam(
"mtree_cytoscape_script")
1090 mTreeFh = open(os.path.join(outputDir, mTreeCytoscapeScript),
'w')
1092 "network import file=%s\n" %
1093 os.path.join(outputDir, mTreeCytoscapeInput))
1095 "node import attributes file=\"%s\"\n" %
1096 os.path.join(outputDir, mTreeCytoscapeAssignments))
1098 "node import attributes file=\"%s\"\n" %
1099 os.path.join(outputDir, mTreeCytoscapeAtomSummary))
1101 "node import attributes file=\"%s\"\n" %
1102 os.path.join(outputDir, mTreeCytoscapeAtomChains))
1103 mTreeFh.write(
"layout jgraph-tree\n")
1105 def getGraphStructure(self, graph, fileName, separator):
1107 graphLogWrite = open(fileName,
'w')
1108 graph.show(graphLogWrite)
1109 graphLogWrite.close()
1112 graphLogRead = open(fileName,
'r')
1114 nodeRe = re.compile(r'^(\d+)\[label\=\"\[*(.*?)\s*\]*\"')
1115 separatorEscape =
"\" + separator
1116 edgeString =
r"^(\d+)\-%s(\d+)" % separatorEscape
1117 edgeRe = re.compile(edgeString)
1123 for line
in graphLogRead:
1126 node = nodeRe.search(line)
1128 nodeNumber = node.group(1)
1129 atomString = node.group(2)
1130 nodesToNames[nodeNumber] = atomString
1134 edge = edgeRe.search(line)
1136 firstNode = edge.group(1)
1137 secondNode = edge.group(2)
1139 if (firstNode
in nodesToNodes):
1140 firstNodeDict = nodesToNodes[firstNode]
1141 firstNodeDict[secondNode] = 1
1142 nodesToNodes[firstNode] = firstNodeDict
1144 return [nodesToNames, nodesToNodes]
1146 def writeEdgeFile(self, nodesToNodes, edgeFileName):
1148 outputDir = self.getParam(
"output_directory")
1149 graphInputFile = open(os.path.join(outputDir, edgeFileName),
'w')
1150 for firstNode
in nodesToNodes.keys():
1151 nodeDict = nodesToNodes[firstNode]
1152 for secondNode
in nodeDict.keys():
1153 graphInputFile.write(
"%s ttt %s\n" % (firstNode, secondNode))
1154 graphInputFile.close()
1156 def writeCytoscapeIgInput(self):
1157 outputDir = self.getParam(
"output_directory")
1158 igOutputFile = self.getParam(
"ig_output_file")
1159 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1160 self.ig, os.path.join(outputDir, igOutputFile),
"-")
1164 self.getParam(
"ig_cytoscape_input_file"))
1167 igResiduesFile = self.getParam(
"ig_cytoscape_residues_file")
1168 peptideChain = self.getParam(
"peptide_chain")
1170 subsetResidueFile = open(os.path.join(outputDir, igResiduesFile),
'w')
1171 subsetResidueFile.write(
"ResidueNumber\n")
1172 for nodeNumber
in nodesToNames.keys():
1173 nodeName = nodesToNames[nodeNumber]
1175 [nodeChain, residueNumber,
1176 nodeAtom] = atomicDominoUtilities.getAtomInfoFromName(nodeName)
1177 if (nodeChain == peptideChain):
1178 subsetResidueFile.write(
1180 (nodeNumber, residueNumber))
1184 subsetResidueFile.write(
"%s = 100\n" % nodeNumber)
1186 def writeCytoscapeMtInput(self):
1187 outputDir = self.getParam(
"output_directory")
1188 mTreeOutputFile = self.getParam(
"mtree_output_file")
1189 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1190 self.mt, os.path.join(outputDir, mTreeOutputFile),
">")
1194 self.getParam(
"mtree_cytoscape_input_file"))
1195 self.writeNodeNameAttributes(
1196 nodesToNames, self.getParam(
"mtree_cytoscape_atom_name_file"),
1197 self.getParam(
"mtree_cytoscape_atom_summary_file"),
1198 self.getParam(
"mtree_cytoscape_atom_chain_file"))
1200 def writeCytoscapeJtInput(self):
1202 outputDir = self.getParam(
"output_directory")
1203 jTreeOutputFile = self.getParam(
"jtree_output_file")
1204 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1205 self.jt, os.path.join(outputDir, jTreeOutputFile),
"-")
1209 self.getParam(
"jtree_cytoscape_input_file"))
1211 self.writeNodeNameAttributes(
1212 nodesToNames, self.getParam(
"jtree_cytoscape_atom_name_file"),
1213 self.getParam(
"jtree_cytoscape_atom_summary_file"),
1214 self.getParam(
"jtree_cytoscape_atom_chain_file"))
1218 jtreeCytoscapeEdgeFile = self.getParam(
"jtree_cytoscape_edge_file")
1219 edgeWeightFile = open(
1220 os.path.join(outputDir,
1221 jtreeCytoscapeEdgeFile),
1223 edgeWeightFile.write(
"SubsetOverlap (class=Integer)\n")
1224 for firstNode
in nodesToNodes.keys():
1225 nodeDict = nodesToNodes[firstNode]
1226 for secondNode
in nodeDict.keys():
1227 firstNodeAtoms = nodesToNames[firstNode].split(
" ")
1228 secondNodeAtoms = nodesToNames[secondNode].split(
" ")
1230 val
for val
in firstNodeAtoms
if val
in secondNodeAtoms]
1231 edgeWeightFile.write(
1232 "%s (pp) %s = %s\n" %
1233 (firstNode, secondNode, len(intersection)))
1234 edgeWeightFile.close()
1236 def getAtomTypeCounts(self, atomNames):
1238 atomNames = atomNames.lstrip(
'[')
1239 atomNames = atomNames.rstrip(
']')
1240 atomNames = atomNames.rstrip()
1242 atomList = atomNames.split(
" ")
1243 peptideAtomCount = 0
1244 proteinAtomCount = 0
1245 for atom
in atomList:
1246 [chain, residue, atom] = atomicDominoUtilities.getAtomInfoFromName(
1248 if (chain == self.getParam(
"peptide_chain")):
1249 peptideAtomCount += 1
1251 proteinAtomCount += 1
1252 return [peptideAtomCount, proteinAtomCount]
1254 def writeNodeNameAttributes(
1261 outputDir = self.getParam(
"output_directory")
1262 subsetAtomNameFile = open(os.path.join(outputDir, atomNameFile),
'w')
1263 subsetAtomSummaryFile = open(
1264 os.path.join(outputDir, atomSummaryFile),
'w')
1265 subsetAtomChainFile = open(os.path.join(outputDir, atomChainFile),
'w')
1267 subsetAtomNameFile.write(
"Atom names\n")
1268 subsetAtomSummaryFile.write(
"Atom Summary\n")
1269 subsetAtomChainFile.write(
"Atom chain\n")
1270 for node
in nodesToNames.keys():
1271 atomNames = nodesToNames[node]
1272 subsetAtomNameFile.write(
"%s = %s\n" % (node, atomNames))
1273 [peptideAtomCount, proteinAtomCount] = self.getAtomTypeCounts(
1276 subsetAtomSummaryFile.write(
1278 (node, proteinAtomCount, peptideAtomCount))
1281 if (proteinAtomCount == 0):
1282 subsetAtomChainFile.write(
"%s = 1\n" % node)
1283 elif (peptideAtomCount == 0):
1284 subsetAtomChainFile.write(
"%s = 2\n" % node)
1286 subsetAtomChainFile.write(
"%s = 3\n" % node)
1288 subsetAtomChainFile.close()
1289 subsetAtomSummaryFile.close()
1290 subsetAtomNameFile.close()
InteractionGraph get_interaction_graph(ScoringFunctionAdaptor rs, const ParticlesTemp &pst)
Chain get_chain(Hierarchy h)
Get the containing chain or Chain() if there is none.
Grid3D< int, SparseGridStorage3D< int, UnboundedGridStorage3D > > SparseUnboundedIntGrid3D
SubsetGraph get_junction_tree(const InteractionGraph &ig)
Simple conjugate gradients optimizer.
Sample best solutions using Domino.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Subsets get_subsets(const SubsetGraph &g)
Gets all of the Subsets of a SubsetGraph.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Grid3D< double, DenseGridStorage3D< double > > DenseDoubleGrid3D
MergeTree get_balanced_merge_tree(const SubsetGraph &junction_tree)
void read_pdb(TextInput input, int model, Hierarchy h)
display::Geometries get_subset_graph_geometry(const SubsetGraph &ig)
Class for storing model, its restraints, constraints, and particles.
Select all non-alternative ATOM records.
double get_rmsd(const Selection &s0, const Selection &s1)
ParticlesTemp get_order(const Subset &s, const SubsetFilterTables &sft)
void load_merged_assignments(const Subset &first_subset, AssignmentContainer *first, const Subset &second_subset, AssignmentContainer *second, const SubsetFilterTablesTemp &filters, AssignmentContainer *ret)
Fill in assignments for an internal node.
RestraintsTemp get_restraints(It b, It e)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
void set_log_level(LogLevel l)
Set the current global log level.
A decorator for a residue.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Store a configuration of a subset.
Store assignments in a compact form in memory.
Write a CGO file with the geometry.
Store a set of k top scoring assignments.
display::Geometries get_interaction_graph_geometry(const InteractionGraph &ig)
void load_particle_states(const Subset &s, const Assignment &ss, const ParticleStatesTable *pst)
Load the appropriate state for each particle in a Subset.
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.