12 from .
import atomicDominoUtilities
17 def __init__(self, model, protein, parameterFileName):
19 self.protein = protein
20 self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
22 self.readParameters(parameterFileName)
23 self.wroteNativeProtein = 0
26 def readParameters(self, parameterFileName):
27 self.parameters = atomicDominoUtilities.readParameters(
31 currentMem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
32 currentMem *= 10 ** -6
33 print(
"log memory: %s" % currentMem)
34 if (currentMem > self.maxMem):
35 self.maxMem = currentMem
38 def getParam(self, paramName):
39 paramValue = self.parameters[paramName]
42 def loadDominoHelpers(self):
43 self.subsetStateScores = {}
44 self.subsetRestraintScores = {}
45 self.subsetStateFailures = {}
46 self.nodesToAssignmentCount = {}
48 self.totalAssignments = 0
49 outputDir = self.getParam(
"output_directory")
50 mtreeCytoscapeAssignmentFile = self.getParam(
51 "mtree_cytoscape_assignment_file")
52 self.mtreeCytoscapeAssignmentFh = open(
53 os.path.join(outputDir, mtreeCytoscapeAssignmentFile),
'w')
54 self.mtreeCytoscapeAssignmentFh.write(
"Assignments\n")
56 self.mtNamesToIndices = {}
58 self.subsetNamesToAssignmentFiles = {}
64 def logTimePoint(self, reset):
67 timeDifference = newTime - self.timePoint
68 timeDifference = round(timeDifference, 0)
69 self.timePoint = newTime
73 self.timePoint = newTime
76 def createParticleStatesTable(self):
78 for particleName
in self.particleInfo.keys():
81 dataArray = self.particleInfo[particleName]
82 for i
in range(len(dataArray)):
83 [state, center] = dataArray[i]
84 statesToCenters[state] = center
87 for stateIndex
in sorted(statesToCenters.keys()):
89 statesList.append(vector3d)
93 self.dominoPst.set_particle_states(
94 self.namesToParticles[particleName],
97 def quickParticleName(self, particle):
98 return atomicDominoUtilities.quickParticleName(particle)
100 def filterAssignments(self, assignments, subset, nodeIndex, rssft):
107 restraintList.append(r)
114 sFilter = rssft.get_subset_filter(subset, filteredSubsets)
116 filteredAssignments = []
117 for assignment
in assignments:
119 if (sFilter
is None or sFilter.get_is_ok(assignment)):
122 filteredAssignments.append(assignment)
125 fraction = (passedCounter * 1.0) / (stateCounter * 1.0)
126 print(
"%s states passed out of %s total for this subset (fraction %s)"
127 % (passedCounter, stateCounter, fraction))
128 if (passedCounter == 0):
129 print(
"subset %s had 0 assignments (out of %s) pass. Exiting..."
130 % (subset, stateCounter))
133 return filteredAssignments
137 def quickSubsetName(self, subset):
138 cleanName = self.cleanVertexName(subset)
139 atomNameList = cleanName.split(
" ")
140 sortedList = sorted(atomNameList)
142 name =
" ".join(sortedList)
145 def getNodeIndexList(self):
146 return self.mt.get_vertices()
148 def getLeafNodeIndexList(self):
149 leafNodeIndexList = []
150 for subset
in self.subsets:
151 index = self.getMtIndexForSubset(self.quickSubsetName(subset))
152 leafNodeIndexList.append(index)
153 return leafNodeIndexList
156 def createSampler(self):
159 s.set_merge_tree(self.mt)
161 filterTables.append(self.rssft)
162 s.set_subset_filter_tables(filterTables)
164 s.set_assignments_table(self.lat)
166 if (self.getParam(
"cross_subset_filtering") == 1):
167 s.set_use_cross_subset_filtering(1)
174 def createSubsets(self):
175 self.initializeParticleStatesTable()
177 [self.model.get_root_restraint_set()],
179 print(
"interaction graph:")
182 print(
"junction tree:")
184 self.subsetNamesToSubsets = {}
188 for index
in mt.get_vertices():
189 subset = mt.get_vertex_name(index)
190 subsetName = self.cleanVertexName(subset)
191 self.mtNamesToIndices[subsetName] = index
194 for subset
in subsets:
195 print(
"created subset %s" % subset)
196 subsetName = self.quickSubsetName(subset)
197 self.subsetNamesToSubsets[subsetName] = subset
205 self.subsets = subsets
207 self.parentSiblingMap = {}
208 self.parentSiblingMap[self.mt.get_vertices()[-1]] = {}
209 self.createSiblingMap(self.mt.get_vertices()[-1])
211 def getMtIndexForSubset(self, subsetName):
212 for index
in self.mt.get_vertices():
213 mtNode = self.mt.get_vertex_name(index)
214 mtName = self.cleanVertexName(mtNode)
215 mtName = mtName.rstrip()
216 mtName = mtName.lstrip()
218 mtNameList = mtName.split(
" ")
219 subsetNameList = subsetName.split(
" ")
220 mtNameListSorted = sorted(mtNameList)
221 subsetNameListSorted = sorted(subsetNameList)
222 if (
" ".join(subsetNameListSorted) ==
" ".join(mtNameListSorted)):
225 print(
"did not find merge tree index for subset name %s" % subsetName)
228 def getDominoParticleNames(self):
229 particles = self.dominoPst.get_particles()
230 particleNameList = []
231 for particle
in particles:
232 pName = self.quickParticleName(particle)
233 particleNameList.append(pName)
235 return particleNameList
237 def getSubsetNameList(self):
239 for subset
in self.subsets:
240 name = self.quickSubsetName(subset)
241 subsetNameList.append(name)
242 return subsetNameList
244 def getMtIndexToParticles(self):
246 mtIndexToParticles = {}
247 allVertices = self.mt.get_vertices()
248 for nodeIndex
in allVertices:
249 subset = self.mt.get_vertex_name(nodeIndex)
250 particleList = self.quickSubsetName(subset)
251 mtIndexToParticles[nodeIndex] = particleList
252 print(
"createtMtIndexToParticles: index %s is particleList %s "
253 % (nodeIndex, particleList))
255 return mtIndexToParticles
257 def readTrajectoryFile(
270 scoreOutputFh = open(scoreOutputFile,
'w')
275 print(
"exception in loading frame %s" % i)
277 score = self.model.evaluate(
False)
284 rmsd = self.calculateNativeRmsd(flexibleAtoms)
285 scoreOutputFh.write(
"%s\t%s\t%s\n" % (i, score, rmsd))
286 if (score < bestScore):
290 if (rmsd < bestRmsd):
293 if (skipDomino == 0):
294 for atomName
in atomList:
296 particle = self.namesToParticles[atomName]
299 gridIndex = self.snapToGrid(particle)
300 center = self.grid.get_center(gridIndex)
302 for coordinate
in center:
303 pythonCenter.append(coordinate)
307 if ((gridIndex
in self.particleStatesSeen[atomName]) == 0):
312 list(self.particleStatesSeen[atomName].keys()))
313 self.particleStatesSeen[
315 gridIndex] = currentSize
316 state = self.particleStatesSeen[atomName][gridIndex]
317 self.particleInfo[atomName].append([state, pythonCenter])
319 print(
"didn't add domino states due to skip parameters")
320 return [bestScore, bestScoreFrame, bestRmsd, bestRmsdFrame]
325 def getParticleGridIndex(self, leaf):
327 coordinates = xyzDecorator.get_coordinates()
329 extendedIndex = self.grid.get_extended_index(coordinates)
330 if (self.grid.get_has_index(extendedIndex) == 0):
332 if (self.getParam(
"grid_type") ==
"sparse"):
333 self.grid.add_voxel(extendedIndex, 1)
335 self.grid.add_voxel(extendedIndex)
337 index = self.grid.get_index(extendedIndex)
342 def snapToGrid(self, particle):
344 index = self.getParticleGridIndex(particle)
345 center = self.grid.get_center(index)
347 xyzDecorator.set_coordinates(center)
352 def discretizeNativeProtein(self):
354 outputDir = self.getParam(
"output_directory")
355 nativeSnappedFile = self.getParam(
"native_protein_snapped_output_file")
359 self.snapToGrid(leaf)
368 def getPeptideCa(self):
371 residues = IMP.atom.get_by_type(self.protein, IMP.atom.RESIDUE_TYPE)
372 peptideIndicesToResidues = {}
373 for residueH
in residues:
375 chainId = chain.get_id()
377 if (chainId == self.getParam(
"peptide_chain")):
378 peptideIndicesToResidues[residue.get_index()] = residue
382 minPeptide = min(sorted(peptideIndicesToResidues.keys()))
383 maxPeptide = max(sorted(peptideIndicesToResidues.keys()))
384 centerPeptide = round(((maxPeptide - minPeptide) / 2 + minPeptide), 0)
388 centerName = atomicDominoUtilities.makeParticleName(
389 self.getParam(
"peptide_chain"),
392 centerParticle = self.namesToParticles[centerName]
393 return centerParticle
397 def createGrid(self):
399 protBb = IMP.atom.get_bounding_box(self.protein)
401 gridSpacing = float(self.getParam(
"grid_spacing"))
402 bufferSpace = float(self.getParam(
"grid_buffer_space"))
404 protBb += bufferSpace
406 if (self.getParam(
"grid_type") ==
"sparse"):
407 ca = self.getPeptideCa()
410 gridSpacing, xyzCa.get_coordinates())
418 def initializeParticleStatesTable(self):
421 restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
422 self.protein, self.model, self.namesToParticles)
424 for p
in restrainedParticles:
427 dominoPst.set_particle_states(p, xyzStates)
429 self.dominoPst = dominoPst
431 def createUniqueLeafAssignments(self, particleNameList, particleInfo):
433 size = len(particleInfo[particleNameList[0]])
436 for i
in range(size):
438 for particleName
in particleNameList:
439 dataArray = particleInfo[particleName]
440 [state, center] = dataArray[i]
441 nextAssignment.append(state)
442 allAssignments.append(nextAssignment)
445 uniqueAssignments = {}
446 for assignment
in allAssignments:
449 for index
in assignment:
450 stateString = stateString + str(index) +
"_"
451 uniqueAssignments[stateString] = assignment
452 finalAssignments = []
455 for stateString
in uniqueAssignments.keys():
456 assignmentList = uniqueAssignments[stateString]
458 finalAssignments.append(assignment)
459 return finalAssignments
463 def readMdTrajectory(self, atomList, flexibleAtoms):
466 outputDir = self.getParam(
"output_directory")
467 trajectoryFile = self.getParam(
"md_trajectory_output_file")
468 fullFile = os.path.join(outputDir, trajectoryFile)
469 rh = RMF.open_rmf_file(fullFile)
470 IMP.rmf.set_hierarchies(rh, [self.protein])
471 framesToRead = atomicDominoUtilities.getMdIntervalFrames(
472 rh, int(self.getParam(
"md_interval")), self.protein)
473 print(
"preparing to read md frames %s" % framesToRead)
480 particleStatesSeen = {}
481 for atomName
in atomList:
482 particleInfo[atomName] = []
483 particleStatesSeen[atomName] = {}
485 self.particleStatesSeen = particleStatesSeen
486 self.particleInfo = particleInfo
489 mdScoreOutputFile = os.path.join(
491 self.getParam(
"md_score_output_file"))
495 bestRmsdFrame] = self.readTrajectoryFile(
505 self.singlePdbResults(
508 self.getParam(
"best_md_score_output_file"))
510 self.singlePdbResults(
513 self.getParam(
"best_md_rmsd_output_file"))
514 self.singlePdbResults(
517 self.getParam(
"final_md_frame_output_file"))
519 self.bestMdScore = round(bestMdScore, 2)
520 self.bestMdRmsd = round(bestRmsd, 2)
521 self.bestMdScoreFrame = bestScoreFrame
522 self.bestMdRmsdFrame = bestRmsdFrame
524 def readCgTrajectories(self, atomList, flexibleAtoms):
526 cgFileName = self.getParam(
"cg_output_file")
527 bestCgScore = 10000000
529 bestCgRmsd = 10000000
532 outputDir = self.getParam(
"output_directory")
533 trajectoryFile = self.getParam(
"md_trajectory_output_file")
534 fullFile = os.path.join(outputDir, trajectoryFile)
535 rh = RMF.open_rmf_file(fullFile)
536 IMP.rmf.set_hierarchies(rh, [self.protein])
537 framesToRead = atomicDominoUtilities.getMdIntervalFrames(
538 rh, int(self.getParam(
"cg_interval")), self.protein)
540 skipCgDomino = int(self.getParam(
"skip_cg_domino"))
542 if (len(framesToRead) > 0):
543 for cgNumber
in framesToRead:
545 outputDir = self.getParam(
"output_directory")
546 fullCgFileName = os.path.join(
548 (cgFileName, cgNumber))
549 rh = RMF.open_rmf_file(fullCgFileName)
550 IMP.rmf.set_hierarchies(rh, [self.protein])
553 frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
556 if (frameCount > 20):
557 startFrameCount = frameCount - 20
559 for i
in range(startFrameCount, frameCount):
563 cgScoreOutputFile = os.path.join(
565 (self.getParam(
"cg_score_output_file"), cgNumber))
566 [cgScore, cgScoreFrame, cgRmsd, cgRmsdFrame] = \
567 self.readTrajectoryFile(atomList, rh, cgFrames,
568 cgScoreOutputFile, skipCgDomino,
570 print(
"cg number %s rmsd %s score %s"
571 % (cgNumber, cgRmsd, cgScore))
573 if (cgScore < bestCgScore):
574 bestCgScore = cgScore
575 bestCgScoreFile = fullCgFileName
576 if (cgRmsd < bestCgRmsd):
578 bestCgRmsdFile = fullCgFileName
581 self.singlePdbResults(
584 self.getParam(
"best_cg_score_output_file"))
585 self.singlePdbResults(
588 self.getParam(
"best_cg_rmsd_output_file"))
589 self.singlePdbResults(
590 "%s%s" % (cgFileName, framesToRead[-1]),
591 -1, self.getParam(
"final_cg_frame_output_file"))
592 finalCgRmsd = self.calculateNativeRmsd(flexibleAtoms)
593 print(
"final cg rmsd is %s " % finalCgRmsd)
594 self.bestCgScore = round(bestCgScore, 2)
595 self.bestCgRmsd = round(bestCgRmsd, 2)
596 self.bestCgScoreFile = bestCgScoreFile
597 self.bestCgRmsdFile = bestCgRmsdFile
599 def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
601 fullTrajectoryFile = os.path.join(
602 self.getParam(
"output_directory"),
604 fullOutputFile = os.path.join(
605 self.getParam(
"output_directory"),
607 rh = RMF.open_rmf_file(fullTrajectoryFile)
608 IMP.rmf.set_hierarchies(rh, [self.protein])
610 frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
614 def calculateRmsd(self, otherProtein, flexibleAtoms):
615 otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
619 for pName
in otherNamesToParticles.keys():
620 if ((pName
in flexibleAtoms) == 0):
622 otherParticle = otherNamesToParticles[pName]
623 modelParticle = self.namesToParticles[pName]
624 otherVector.append(
IMP.core.XYZ(otherParticle).get_coordinates())
625 modelVector.append(
IMP.core.XYZ(modelParticle).get_coordinates())
629 def calculateNativeRmsd(self, flexibleAtoms):
631 if (self.wroteNativeProtein == 0):
632 pdbName = self.getParam(
"native_pdb_input_file")
638 self.wroteNativeProtein = 1
640 return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
642 def calculateTrajectoryRmsd(
647 pdbName = self.getParam(
"native_pdb_input_file")
652 outputDir = self.getParam(
"output_directory")
653 fullFile = os.path.join(outputDir, trajectoryFile)
654 print(
"open calculate traj rmf %s" % fullFile)
655 rh = RMF.open_rmf_file(fullFile)
656 IMP.rmf.set_hierarchies(rh, [otherProtein])
657 if (trajectoryFrame == -1):
658 trajectoryFrame = IMP.rmf.get_number_of_frames(
659 rh, otherProtein) - 1
661 return self.calculateRmsd(otherProtein, flexibleAtoms)
663 def createAllSubsetAssignments(self):
667 self.model.get_root_restraint_set(),
670 leafNodeIndexList = self.getLeafNodeIndexList()
672 for nodeIndex
in leafNodeIndexList:
674 subset = self.mt.get_vertex_name(nodeIndex)
675 particleNameList = []
676 for particle
in subset:
677 particleNameList.append(self.quickParticleName(particle))
678 print(
"creating initial assignments for leaf %s" % nodeIndex)
680 assignments = self.createUniqueLeafAssignments(
681 particleNameList, self.particleInfo)
682 filteredAssignments = self.filterAssignments(
683 assignments, subset, nodeIndex, rssft)
687 for assignment
in filteredAssignments:
688 packedAssignmentContainer.add_assignment(assignment)
689 lat.set_assignments(subset, packedAssignmentContainer)
695 root = self.mt.get_vertices()[-1]
696 completeAc = self.loadAssignments(root)
697 self.completeAc = completeAc
699 def loadAssignments(self, nodeIndex):
701 children = self.mt.get_out_neighbors(nodeIndex)
702 subset = self.mt.get_vertex_name(nodeIndex)
703 heapCount = int(self.getParam(
"heap_count"))
705 heapCount, self.rssft.get_subset_filter(subset, []))
706 if len(children) == 0:
707 print(
"computing assignments for leaf %s" % nodeIndex)
709 self.sampler.load_vertex_assignments(nodeIndex, mine)
710 print(
"leaf node %s has %s leaf assignments"
711 % (nodeIndex, mine.get_number_of_assignments()))
713 if (children[0] > children[1]):
714 children = [children[1], children[0]]
716 firstAc = self.loadAssignments(children[0])
717 secondAc = self.loadAssignments(children[1])
719 self.sampler.load_vertex_assignments(
720 nodeIndex, firstAc, secondAc, mine)
722 timeDifference = self.logTimePoint(0)
723 print(
"Done Parent %s Assignments %s first child %s "
724 "second child %s time %s"
725 % (nodeIndex, mine.get_number_of_assignments(),
726 firstAc.get_number_of_assignments(),
727 secondAc.get_number_of_assignments(), timeDifference))
728 self.totalAssignments += mine.get_number_of_assignments()
732 def writeOutput(self, flexibleAtoms, startTime):
734 bestDominoScore = 100000
736 finalAssignments = self.completeAc.get_assignments()
737 for assignment
in finalAssignments:
739 self.dominoPst.get_subset(),
742 score = self.model.evaluate(
False)
743 if (score < bestDominoScore):
744 bestAssignment = assignment
745 bestDominoScore = round(score, 2)
746 print(
"best domino score is %s " % bestDominoScore)
747 print(
"best md score is %s" % self.bestMdScore)
748 print(
"best md rmsd is %s" % self.bestMdRmsd)
749 print(
"best cg score is %s" % self.bestCgScore)
750 print(
"best cg rmsd is %s" % self.bestCgRmsd)
751 print(
"merge tree contained %s total assignments"
752 % self.totalAssignments)
755 self.dominoPst.get_subset(),
758 dominoVsMdRmsd = round(
759 self.calculateTrajectoryRmsd(
761 "md_trajectory_output_file"),
762 self.bestMdScoreFrame,
769 os.path.join(self.getParam(
"output_directory"),
770 self.getParam(
"minimum_domino_score_pdb")))
772 dominoVsCgRmsd = round(
773 self.calculateTrajectoryRmsd(
774 self.bestCgScoreFile,
778 dominoMinimizedScore = round(self.model.evaluate(
False), 2)
779 dominoRmsd = round(self.calculateNativeRmsd(flexibleAtoms), 2)
781 runTime = round(time.time() - startTime, 2)
782 print(
"final domino score (after cg): %s" % dominoMinimizedScore)
783 print(
"final domino rmsd: %s" % dominoRmsd)
784 print(
"best domino rmsd with best md score: %s" % dominoVsMdRmsd)
785 print(
"domino rmsd with best cg score: %s" % dominoVsCgRmsd)
786 print(
"Final Results\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
787 % (self.bestMdScore, self.bestMdRmsd, self.bestCgScore,
788 self.bestCgRmsd, bestDominoScore, dominoRmsd,
789 dominoMinimizedScore, dominoVsCgRmsd, self.totalAssignments,
790 self.maxMem, runTime))
795 def createSubsetFromParticles(self, particleNames):
796 particleNameList = particleNames.split(
" ")
798 for particleName
in particleNameList:
799 particle = self.namesToParticles[particleName]
800 particleList.append(particle)
803 return [subset, particleList]
805 def createHdf5AssignmentContainer(self, index, particleNames, read):
806 root = self.getAssignmentContainerRoot(index, read)
807 print(
"got root for index %s" % index)
810 dataset = root.get_child_index_data_set_2d(str(index))
812 dataset = root.add_child_index_data_set_2d(str(index))
813 print(
"added child index dataset")
815 [subset, particleOrder] = self.createSubsetFromParticles(particleNames)
816 print(
"created subset for index %s" % index)
817 hdf5Ac = IMP.domino.create_assignments_container(
818 dataset, subset, particleOrder)
819 print(
"returning from create")
821 for nextInt
in order:
822 print(
"next int is %s" % nextInt)
823 return [subset, hdf5Ac]
825 def loadAssignmentsParallel(
830 mtIndexToSubsetOrder,
834 if ((
"firstChild" in mtIndexToNodeInfo[nodeIndex]) == 0):
835 print(
"writing file for leaf index %s" % nodeIndex)
837 self.createAssignmentsParallel(
844 beginTime = self.logTimePoint(1)
845 firstChildIndex = mtIndexToNodeInfo[nodeIndex][
"firstChild"]
846 [firstSubset, firstAc] = self.createHdf5AssignmentContainer(
847 firstChildIndex, mtIndexToSubsetOrder[firstChildIndex], 1)
848 firstAcCreateTime = self.logTimePoint(0)
850 secondChildIndex = mtIndexToNodeInfo[nodeIndex][
"secondChild"]
851 [secondSubset, secondAc] = self.createHdf5AssignmentContainer(
852 secondChildIndex, mtIndexToSubsetOrder[secondChildIndex], 1)
853 secondAcCreateTime = self.logTimePoint(0)
855 print(
"getting assignments for nodeIndex %s first child %s "
857 % (nodeIndex, firstChildIndex, secondChildIndex))
858 firstChildParticles = mtIndexToSubsetOrder[firstChildIndex]
859 secondChildParticles = mtIndexToSubsetOrder[secondChildIndex]
860 myParticles = mtIndexToParticles[nodeIndex]
862 print(
"first child particles %s\nsecond child particles %s\n"
864 % (firstChildParticles, secondChildParticles, myParticles))
865 for p
in firstSubset:
866 print(
"next particle in first subset: %s"
867 % self.quickParticleName(p))
869 for p
in secondSubset:
870 print(
"next particle in second subset: %s"
871 % self.quickParticleName(p))
873 for assignment
in firstAc.get_assignments():
874 print(
"next assignment for first child %s: %s"
875 % (firstChildIndex, assignment))
877 for assignment
in secondAc.get_assignments():
878 print(
"next assignment for second child %s: %s"
879 % (secondChildIndex, assignment))
881 [mySubset, myAc] = self.createHdf5AssignmentContainer(
882 nodeIndex, mtIndexToParticles[nodeIndex], 0)
883 print(
"done creating hdf5")
884 prepTime = self.logTimePoint(0)
886 self.model.get_root_restraint_set(),
897 heapTime = self.logTimePoint(0)
899 addTime = self.logTimePoint(0)
900 for assignment
in myAc.get_assignments():
901 print(
"loadAssignmentsParallel next assignment for %s: %s"
902 % (nodeIndex, assignment))
903 doneTime = self.logTimePoint(0)
904 firstChildCount = firstAc.get_number_of_assignments()
905 secondChildCount = secondAc.get_number_of_assignments()
906 print(
"first count: %s second count: %s begin: %s firstAc: %s "
907 "secondAc: %s prep: %s heap: %s add: %s done: %s"
908 % (firstChildCount, secondChildCount, beginTime,
909 firstAcCreateTime, secondAcCreateTime, prepTime,
910 heapTime, addTime, doneTime))
911 subsetOrder = self.getSubsetOrderList(mySubset)
914 def createAssignmentsParallel(self, particleInfo, nodeIndex,
917 subsetName = mtIndexToParticles[nodeIndex]
918 print(
"starting assignments parallel leaf index %s subset name %s"
919 % (nodeIndex, subsetName))
920 [subset, particleList] = self.createSubsetFromParticles(subsetName)
922 particleNameList = subsetName.split(
" ")
925 finalAssignments = self.createUniqueLeafAssignments(
926 particleNameList, particleInfo)
929 self.model.get_root_restraint_set(),
931 filteredAssignments = self.filterAssignments(
932 finalAssignments, subset, nodeIndex, rssft)
933 root = self.getAssignmentContainerRoot(nodeIndex, 0)
934 dataset = root.add_child_index_data_set_2d(str(nodeIndex))
935 dataset.set_size([0, len(subset)])
937 hdf5AssignmentContainer = IMP.domino.create_assignments_container(
938 dataset, subset, particleOrder)
939 for assignment
in filteredAssignments:
940 hdf5AssignmentContainer.add_assignment(assignment)
941 for assignment
in hdf5AssignmentContainer.get_assignments():
942 print(
"hdf5 assignment container node %s next assignment %s"
943 % (nodeIndex, assignment))
946 subsetOrder = self.getSubsetOrderList(subset)
947 print(
"leaf node returning with order %s" % subsetOrder)
952 def writePymolData(self):
954 outputDir = self.getParam(
"output_directory")
956 pymolInteractions = self.getParam(
"pymol_interactions_file")
962 pymolSubsets = self.getParam(
"pymol_subsets_file")
970 def cleanVertexName(self, vertexName):
974 nodeRe = re.compile(
r'Subset\(\[(.*?)\s*\]')
975 secondNodeRe = re.compile(
r'\[(.*?)\s*\]')
976 node = nodeRe.search(str(vertexName))
977 secondNode = secondNodeRe.search(str(vertexName))
981 foundName = node.group(1)
983 foundName = secondNode.group(1)
984 vertexNameFinal = foundName.replace(
'"',
'')
985 return vertexNameFinal
987 def getSubsetOrderList(self, subset):
989 for particle
in subset:
990 name = self.quickParticleName(particle)
991 subsetOrderList.append(name)
992 subsetOrder =
" ".join(subsetOrderList)
995 def checkAssignments(self, subset, nodeIndex, particleOrder):
996 print(
"reading back in assignments for leaf index %s" % nodeIndex)
997 root = self.getAssignmentContainerRoot(nodeIndex, 1)
998 dataset = root.get_child_index_data_set_2d(str(nodeIndex))
999 hdf5 = IMP.domino.create_assignments_container(
1000 dataset, subset, particleOrder)
1001 for assignment
in hdf5.get_assignments():
1002 print(
"leaf index %s read back in %s" % (nodeIndex, assignment))
1004 def createSamplerLite(self):
1007 if (self.getParam(
"cross_subset_filtering") == 1):
1008 s.set_use_cross_subset_filtering(1)
1012 def createSiblingMap(self, parentIndex):
1014 children = self.mt.get_out_neighbors(parentIndex)
1015 if (len(children) > 0):
1016 firstChild = children[0]
1017 secondChild = children[1]
1018 self.parentSiblingMap[firstChild] = {}
1019 self.parentSiblingMap[firstChild][
"sibling"] = secondChild
1020 self.parentSiblingMap[firstChild][
"parent"] = parentIndex
1022 self.parentSiblingMap[secondChild] = {}
1023 self.parentSiblingMap[secondChild][
"sibling"] = firstChild
1024 self.parentSiblingMap[secondChild][
"parent"] = parentIndex
1025 print(
"created map for parent %s first child %s second child %s"
1026 % (parentIndex, firstChild, secondChild))
1028 self.parentSiblingMap[parentIndex][
"firstChild"] = firstChild
1029 self.parentSiblingMap[parentIndex][
"secondChild"] = secondChild
1031 self.createSiblingMap(firstChild)
1032 self.createSiblingMap(secondChild)
1034 def getMtIndexToNodeInfo(self):
1035 return self.parentSiblingMap
1037 def getLeafParentNodeIndexList(self):
1038 leafParentNodeIndexList = {}
1039 leafNodeIndexList = self.getLeafNodeIndexList()
1040 for leafIndex
in leafNodeIndexList:
1041 parent = self.parentSiblingMap[leafIndex][
"parent"]
1042 leafParentNodeIndexList[parent] = 1
1043 return leafParentNodeIndexList
1045 def getMtIndexToNameList(self):
1047 for index
in self.mt.get_vertices():
1048 name = self.mt.get_vertex_name(index)
1049 mtIndexToNames[index] = name
1050 return mtIndexToNames
1052 def getAssignmentContainerRoot(self, subsetIndex, read):
1053 outputDir = self.getParam(
"output_directory")
1054 filePrefix = self.getParam(
"subset_assignment_output_file")
1055 assignmentFileName = os.path.join(
1057 (filePrefix, subsetIndex))
1058 print(
"creating hdf5 file with name %s" % assignmentFileName)
1061 root = RMF.open_hdf5_file(assignmentFileName)
1063 root = RMF.create_hdf5_file(assignmentFileName)
1070 def writeVisualization(self):
1072 self.writeCytoscapeIgInput()
1073 self.writeCytoscapeJtInput()
1074 self.writeCytoscapeMtInput()
1075 self.writeCytoscapeScripts()
1076 self.writePymolData()
1078 def writeCytoscapeScripts(self):
1079 outputDir = self.getParam(
"output_directory")
1080 mTreeCytoscapeInput = self.getParam(
"mtree_cytoscape_input_file")
1081 mTreeCytoscapeAssignments = self.getParam(
1082 "mtree_cytoscape_assignment_file")
1083 mTreeCytoscapeAtomChains = self.getParam(
1084 "mtree_cytoscape_atom_chain_file")
1085 mTreeCytoscapeAtomSummary = self.getParam(
1086 "mtree_cytoscape_atom_summary_file")
1088 mTreeCytoscapeScript = self.getParam(
"mtree_cytoscape_script")
1089 mTreeFh = open(os.path.join(outputDir, mTreeCytoscapeScript),
'w')
1091 "network import file=%s\n" %
1092 os.path.join(outputDir, mTreeCytoscapeInput))
1094 "node import attributes file=\"%s\"\n" %
1095 os.path.join(outputDir, mTreeCytoscapeAssignments))
1097 "node import attributes file=\"%s\"\n" %
1098 os.path.join(outputDir, mTreeCytoscapeAtomSummary))
1100 "node import attributes file=\"%s\"\n" %
1101 os.path.join(outputDir, mTreeCytoscapeAtomChains))
1102 mTreeFh.write(
"layout jgraph-tree\n")
1104 def getGraphStructure(self, graph, fileName, separator):
1106 graphLogWrite = open(fileName,
'w')
1107 graph.show(graphLogWrite)
1108 graphLogWrite.close()
1111 graphLogRead = open(fileName,
'r')
1113 nodeRe = re.compile(r'^(\d+)\[label\=\"\[*(.*?)\s*\]*\"')
1114 separatorEscape =
"\" + separator
1115 edgeString =
r"^(\d+)\-%s(\d+)" % separatorEscape
1116 edgeRe = re.compile(edgeString)
1122 for line
in graphLogRead:
1125 node = nodeRe.search(line)
1127 nodeNumber = node.group(1)
1128 atomString = node.group(2)
1129 nodesToNames[nodeNumber] = atomString
1133 edge = edgeRe.search(line)
1135 firstNode = edge.group(1)
1136 secondNode = edge.group(2)
1138 if (firstNode
in nodesToNodes):
1139 firstNodeDict = nodesToNodes[firstNode]
1140 firstNodeDict[secondNode] = 1
1141 nodesToNodes[firstNode] = firstNodeDict
1143 return [nodesToNames, nodesToNodes]
1145 def writeEdgeFile(self, nodesToNodes, edgeFileName):
1147 outputDir = self.getParam(
"output_directory")
1148 graphInputFile = open(os.path.join(outputDir, edgeFileName),
'w')
1149 for firstNode
in nodesToNodes.keys():
1150 nodeDict = nodesToNodes[firstNode]
1151 for secondNode
in nodeDict.keys():
1152 graphInputFile.write(
"%s ttt %s\n" % (firstNode, secondNode))
1153 graphInputFile.close()
1155 def writeCytoscapeIgInput(self):
1156 outputDir = self.getParam(
"output_directory")
1157 igOutputFile = self.getParam(
"ig_output_file")
1158 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1159 self.ig, os.path.join(outputDir, igOutputFile),
"-")
1163 self.getParam(
"ig_cytoscape_input_file"))
1166 igResiduesFile = self.getParam(
"ig_cytoscape_residues_file")
1167 peptideChain = self.getParam(
"peptide_chain")
1169 subsetResidueFile = open(os.path.join(outputDir, igResiduesFile),
'w')
1170 subsetResidueFile.write(
"ResidueNumber\n")
1171 for nodeNumber
in nodesToNames.keys():
1172 nodeName = nodesToNames[nodeNumber]
1174 [nodeChain, residueNumber,
1175 nodeAtom] = atomicDominoUtilities.getAtomInfoFromName(nodeName)
1176 if (nodeChain == peptideChain):
1177 subsetResidueFile.write(
1179 (nodeNumber, residueNumber))
1183 subsetResidueFile.write(
"%s = 100\n" % nodeNumber)
1185 def writeCytoscapeMtInput(self):
1186 outputDir = self.getParam(
"output_directory")
1187 mTreeOutputFile = self.getParam(
"mtree_output_file")
1188 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1189 self.mt, os.path.join(outputDir, mTreeOutputFile),
">")
1193 self.getParam(
"mtree_cytoscape_input_file"))
1194 self.writeNodeNameAttributes(
1195 nodesToNames, self.getParam(
"mtree_cytoscape_atom_name_file"),
1196 self.getParam(
"mtree_cytoscape_atom_summary_file"),
1197 self.getParam(
"mtree_cytoscape_atom_chain_file"))
1199 def writeCytoscapeJtInput(self):
1201 outputDir = self.getParam(
"output_directory")
1202 jTreeOutputFile = self.getParam(
"jtree_output_file")
1203 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1204 self.jt, os.path.join(outputDir, jTreeOutputFile),
"-")
1208 self.getParam(
"jtree_cytoscape_input_file"))
1210 self.writeNodeNameAttributes(
1211 nodesToNames, self.getParam(
"jtree_cytoscape_atom_name_file"),
1212 self.getParam(
"jtree_cytoscape_atom_summary_file"),
1213 self.getParam(
"jtree_cytoscape_atom_chain_file"))
1217 jtreeCytoscapeEdgeFile = self.getParam(
"jtree_cytoscape_edge_file")
1218 edgeWeightFile = open(
1219 os.path.join(outputDir,
1220 jtreeCytoscapeEdgeFile),
1222 edgeWeightFile.write(
"SubsetOverlap (class=Integer)\n")
1223 for firstNode
in nodesToNodes.keys():
1224 nodeDict = nodesToNodes[firstNode]
1225 for secondNode
in nodeDict.keys():
1226 firstNodeAtoms = nodesToNames[firstNode].split(
" ")
1227 secondNodeAtoms = nodesToNames[secondNode].split(
" ")
1229 val
for val
in firstNodeAtoms
if val
in secondNodeAtoms]
1230 edgeWeightFile.write(
1231 "%s (pp) %s = %s\n" %
1232 (firstNode, secondNode, len(intersection)))
1233 edgeWeightFile.close()
1235 def getAtomTypeCounts(self, atomNames):
1237 atomNames = atomNames.lstrip(
'[')
1238 atomNames = atomNames.rstrip(
']')
1239 atomNames = atomNames.rstrip()
1241 atomList = atomNames.split(
" ")
1242 peptideAtomCount = 0
1243 proteinAtomCount = 0
1244 for atom
in atomList:
1245 [chain, residue, atom] = atomicDominoUtilities.getAtomInfoFromName(
1247 if (chain == self.getParam(
"peptide_chain")):
1248 peptideAtomCount += 1
1250 proteinAtomCount += 1
1251 return [peptideAtomCount, proteinAtomCount]
1253 def writeNodeNameAttributes(
1260 outputDir = self.getParam(
"output_directory")
1261 subsetAtomNameFile = open(os.path.join(outputDir, atomNameFile),
'w')
1262 subsetAtomSummaryFile = open(
1263 os.path.join(outputDir, atomSummaryFile),
'w')
1264 subsetAtomChainFile = open(os.path.join(outputDir, atomChainFile),
'w')
1266 subsetAtomNameFile.write(
"Atom names\n")
1267 subsetAtomSummaryFile.write(
"Atom Summary\n")
1268 subsetAtomChainFile.write(
"Atom chain\n")
1269 for node
in nodesToNames.keys():
1270 atomNames = nodesToNames[node]
1271 subsetAtomNameFile.write(
"%s = %s\n" % (node, atomNames))
1272 [peptideAtomCount, proteinAtomCount] = self.getAtomTypeCounts(
1275 subsetAtomSummaryFile.write(
1277 (node, proteinAtomCount, peptideAtomCount))
1280 if (proteinAtomCount == 0):
1281 subsetAtomChainFile.write(
"%s = 1\n" % node)
1282 elif (peptideAtomCount == 0):
1283 subsetAtomChainFile.write(
"%s = 2\n" % node)
1285 subsetAtomChainFile.write(
"%s = 3\n" % node)
1287 subsetAtomChainFile.close()
1288 subsetAtomSummaryFile.close()
1289 subsetAtomNameFile.close()
InteractionGraph get_interaction_graph(ScoringFunctionAdaptor rs, const ParticlesTemp &pst)
Chain get_chain(Hierarchy h)
Get the containing chain or Chain() if there is none.
Grid3D< int, SparseGridStorage3D< int, UnboundedGridStorage3D > > SparseUnboundedIntGrid3D
SubsetGraph get_junction_tree(const InteractionGraph &ig)
Simple conjugate gradients optimizer.
Sample best solutions using Domino.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Subsets get_subsets(const SubsetGraph &g)
Gets all of the Subsets of a SubsetGraph.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Grid3D< double, DenseGridStorage3D< double > > DenseDoubleGrid3D
MergeTree get_balanced_merge_tree(const SubsetGraph &junction_tree)
void read_pdb(TextInput input, int model, Hierarchy h)
display::Geometries get_subset_graph_geometry(const SubsetGraph &ig)
Class for storing model, its restraints, constraints, and particles.
Select all non-alternative ATOM records.
double get_rmsd(const Selection &s0, const Selection &s1)
ParticlesTemp get_order(const Subset &s, const SubsetFilterTables &sft)
void load_merged_assignments(const Subset &first_subset, AssignmentContainer *first, const Subset &second_subset, AssignmentContainer *second, const SubsetFilterTablesTemp &filters, AssignmentContainer *ret)
Fill in assignments for an internal node.
RestraintsTemp get_restraints(It b, It e)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
void set_log_level(LogLevel l)
Set the current global log level.
A decorator for a residue.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Store a configuration of a subset.
Store assignments in a compact form in memory.
Write a CGO file with the geometry.
Store a set of k top scoring assignments.
display::Geometries get_interaction_graph_geometry(const InteractionGraph &ig)
void load_particle_states(const Subset &s, const Assignment &ss, const ParticleStatesTable *pst)
Load the appropriate state for each particle in a Subset.
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.