1 from __future__
import print_function
17 from .
import atomicDominoUtilities
22 def __init__(self, model, protein, parameterFileName):
24 self.protein = protein
25 self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(
27 self.readParameters(parameterFileName)
28 self.wroteNativeProtein = 0
31 def readParameters(self, parameterFileName):
32 self.parameters = atomicDominoUtilities.readParameters(
36 currentMem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
37 currentMem *= 10 ** -6
38 print(
"log memory: %s" % currentMem)
39 if (currentMem > self.maxMem):
40 self.maxMem = currentMem
43 def getParam(self, paramName):
44 paramValue = self.parameters[paramName]
47 def loadDominoHelpers(self):
48 self.subsetStateScores = {}
49 self.subsetRestraintScores = {}
50 self.subsetStateFailures = {}
51 self.nodesToAssignmentCount = {}
53 self.totalAssignments = 0
54 outputDir = self.getParam(
"output_directory")
55 mtreeCytoscapeAssignmentFile = self.getParam(
56 "mtree_cytoscape_assignment_file")
57 self.mtreeCytoscapeAssignmentFh = open(
58 os.path.join(outputDir, mtreeCytoscapeAssignmentFile),
'w')
59 self.mtreeCytoscapeAssignmentFh.write(
"Assignments\n")
61 self.mtNamesToIndices = {}
63 self.subsetNamesToAssignmentFiles = {}
69 def logTimePoint(self, reset):
72 timeDifference = newTime - self.timePoint
73 timeDifference = round(timeDifference, 0)
74 self.timePoint = newTime
78 self.timePoint = newTime
81 def createParticleStatesTable(self):
83 for particleName
in self.particleInfo.keys():
86 dataArray = self.particleInfo[particleName]
87 for i
in range(len(dataArray)):
88 [state, center] = dataArray[i]
89 statesToCenters[state] = center
92 for stateIndex
in sorted(statesToCenters.keys()):
94 statesList.append(vector3d)
98 self.dominoPst.set_particle_states(
99 self.namesToParticles[particleName],
102 def quickParticleName(self, particle):
103 return atomicDominoUtilities.quickParticleName(particle)
105 def filterAssignments(self, assignments, subset, nodeIndex, rssft):
112 restraintList.append(r)
120 sFilter = rssft.get_subset_filter(subset, filteredSubsets)
122 filteredAssignments = []
123 for assignment
in assignments:
125 if (sFilter
is None or sFilter.get_is_ok(assignment)):
128 filteredAssignments.append(assignment)
131 fraction = (passedCounter * 1.0) / (stateCounter * 1.0)
132 print(
"%s states passed out of %s total for this subset (fraction %s)" % (passedCounter, stateCounter, fraction))
133 if (passedCounter == 0):
134 print(
"subset %s had 0 assignments (out of %s) pass. Exiting..." % (subset, stateCounter))
137 return filteredAssignments
141 def quickSubsetName(self, subset):
142 cleanName = self.cleanVertexName(subset)
143 atomNameList = cleanName.split(
" ")
144 sortedList = sorted(atomNameList)
146 name =
" ".join(sortedList)
149 def getNodeIndexList(self):
150 return self.mt.get_vertices()
152 def getLeafNodeIndexList(self):
153 leafNodeIndexList = []
154 for subset
in self.subsets:
155 index = self.getMtIndexForSubset(self.quickSubsetName(subset))
156 leafNodeIndexList.append(index)
157 return leafNodeIndexList
160 def createSampler(self):
163 s.set_merge_tree(self.mt)
165 filterTables.append(self.rssft)
166 s.set_subset_filter_tables(filterTables)
168 s.set_assignments_table(self.lat)
170 if (self.getParam(
"cross_subset_filtering") == 1):
171 s.set_use_cross_subset_filtering(1)
177 def createSubsets(self):
178 self.initializeParticleStatesTable()
180 [self.model.get_root_restraint_set()],
182 print(
"interaction graph:")
185 print(
"junction tree:")
187 self.subsetNamesToSubsets = {}
191 for index
in mt.get_vertices():
192 subset = mt.get_vertex_name(index)
193 subsetName = self.cleanVertexName(subset)
194 self.mtNamesToIndices[subsetName] = index
197 for subset
in subsets:
198 print(
"created subset %s" % subset)
199 subsetName = self.quickSubsetName(subset)
200 self.subsetNamesToSubsets[subsetName] = subset
208 self.subsets = subsets
210 self.parentSiblingMap = {}
211 self.parentSiblingMap[self.mt.get_vertices()[-1]] = {}
212 self.createSiblingMap(self.mt.get_vertices()[-1])
214 def getMtIndexForSubset(self, subsetName):
215 for index
in self.mt.get_vertices():
216 mtNode = self.mt.get_vertex_name(index)
217 mtName = self.cleanVertexName(mtNode)
218 mtName = mtName.rstrip()
219 mtName = mtName.lstrip()
221 mtNameList = mtName.split(
" ")
222 subsetNameList = subsetName.split(
" ")
223 mtNameListSorted = sorted(mtNameList)
224 subsetNameListSorted = sorted(subsetNameList)
225 if (
" ".join(subsetNameListSorted) ==
" ".join(mtNameListSorted)):
228 print(
"did not find merge tree index for subset name %s" % subsetName)
231 def getDominoParticleNames(self):
232 particles = self.dominoPst.get_particles()
233 particleNameList = []
234 for particle
in particles:
235 pName = self.quickParticleName(particle)
236 particleNameList.append(pName)
238 return particleNameList
240 def getSubsetNameList(self):
242 for subset
in self.subsets:
243 name = self.quickSubsetName(subset)
244 subsetNameList.append(name)
245 return subsetNameList
247 def getMtIndexToParticles(self):
249 mtIndexToParticles = {}
250 allVertices = self.mt.get_vertices()
251 for nodeIndex
in allVertices:
252 subset = self.mt.get_vertex_name(nodeIndex)
253 particleList = self.quickSubsetName(subset)
254 mtIndexToParticles[nodeIndex] = particleList
255 print(
"createtMtIndexToParticles: index %s is particleList %s " % (nodeIndex, particleList))
257 return mtIndexToParticles
259 def readTrajectoryFile(
272 scoreOutputFh = open(scoreOutputFile,
'w')
277 print(
"execption in loading frame %s" % i)
279 score = self.model.evaluate(
False)
287 rmsd = self.calculateNativeRmsd(flexibleAtoms)
288 scoreOutputFh.write(
"%s\t%s\t%s\n" % (i, score, rmsd))
289 if (score < bestScore):
293 if (rmsd < bestRmsd):
296 if (skipDomino == 0):
297 for atomName
in atomList:
299 particle = self.namesToParticles[atomName]
302 gridIndex = self.snapToGrid(particle)
303 center = self.grid.get_center(gridIndex)
305 for coordinate
in center:
306 pythonCenter.append(coordinate)
310 if ((gridIndex
in self.particleStatesSeen[atomName]) == 0):
315 list(self.particleStatesSeen[atomName].keys()))
316 self.particleStatesSeen[
318 gridIndex] = currentSize
319 state = self.particleStatesSeen[atomName][gridIndex]
320 self.particleInfo[atomName].append([state, pythonCenter])
322 print(
"didn't add domino states due to skip parameters")
323 return [bestScore, bestScoreFrame, bestRmsd, bestRmsdFrame]
327 def getParticleGridIndex(self, leaf):
329 coordinates = xyzDecorator.get_coordinates()
331 extendedIndex = self.grid.get_extended_index(coordinates)
332 if (self.grid.get_has_index(extendedIndex) == 0):
334 if (self.getParam(
"grid_type") ==
"sparse"):
335 self.grid.add_voxel(extendedIndex, 1)
337 self.grid.add_voxel(extendedIndex)
339 index = self.grid.get_index(extendedIndex)
344 def snapToGrid(self, particle):
346 index = self.getParticleGridIndex(particle)
347 center = self.grid.get_center(index)
349 xyzDecorator.set_coordinates(center)
354 def discretizeNativeProtein(self):
356 outputDir = self.getParam(
"output_directory")
357 nativeSnappedFile = self.getParam(
"native_protein_snapped_output_file")
361 self.snapToGrid(leaf)
370 def getPeptideCa(self):
374 peptideIndicesToResidues = {}
375 for residueH
in residues:
377 chainId = chain.get_id()
378 residue = residueH.get_as_residue()
379 if (chainId == self.getParam(
"peptide_chain")):
380 peptideIndicesToResidues[residue.get_index()] = residue
384 minPeptide = min(sorted(peptideIndicesToResidues.keys()))
385 maxPeptide = max(sorted(peptideIndicesToResidues.keys()))
386 centerPeptide = round(((maxPeptide - minPeptide) / 2 + minPeptide), 0)
390 centerName = atomicDominoUtilities.makeParticleName(
391 self.getParam(
"peptide_chain"),
394 centerParticle = self.namesToParticles[centerName]
395 return centerParticle
399 def createGrid(self):
403 gridSpacing = float(self.getParam(
"grid_spacing"))
404 bufferSpace = float(self.getParam(
"grid_buffer_space"))
406 protBb += bufferSpace
408 if (self.getParam(
"grid_type") ==
"sparse"):
409 ca = self.getPeptideCa()
412 gridSpacing, xyzCa.get_coordinates())
420 def initializeParticleStatesTable(self):
423 restrainedParticles = atomicDominoUtilities.getRestrainedParticles(
424 self.protein, self.model, self.namesToParticles)
426 for p
in restrainedParticles:
431 dominoPst.set_particle_states(p, xyzStates)
433 self.dominoPst = dominoPst
435 def createUniqueLeafAssignments(self, particleNameList, particleInfo):
437 size = len(particleInfo[particleNameList[0]])
440 for i
in range(size):
442 for particleName
in particleNameList:
443 dataArray = particleInfo[particleName]
444 [state, center] = dataArray[i]
445 nextAssignment.append(state)
446 allAssignments.append(nextAssignment)
449 uniqueAssignments = {}
450 for assignment
in allAssignments:
453 for index
in assignment:
454 stateString = stateString + str(index) +
"_"
455 uniqueAssignments[stateString] = assignment
456 finalAssignments = []
459 for stateString
in uniqueAssignments.keys():
460 assignmentList = uniqueAssignments[stateString]
462 finalAssignments.append(assignment)
463 return finalAssignments
467 def readMdTrajectory(self, atomList, flexibleAtoms):
470 outputDir = self.getParam(
"output_directory")
471 trajectoryFile = self.getParam(
"md_trajectory_output_file")
472 fullFile = os.path.join(outputDir, trajectoryFile)
473 rh = RMF.open_rmf_file(fullFile)
474 IMP.rmf.set_hierarchies(rh, [self.protein])
475 framesToRead = atomicDominoUtilities.getMdIntervalFrames(
476 rh, int(self.getParam(
"md_interval")), self.protein)
477 print(
"preparing to read md frames %s" % framesToRead)
483 particleStatesSeen = {}
484 for atomName
in atomList:
485 particle = self.namesToParticles[atomName]
486 particleInfo[atomName] = []
487 particleStatesSeen[atomName] = {}
489 self.particleStatesSeen = particleStatesSeen
490 self.particleInfo = particleInfo
493 mdScoreOutputFile = os.path.join(
495 self.getParam(
"md_score_output_file"))
499 bestRmsdFrame] = self.readTrajectoryFile(
509 self.singlePdbResults(
512 self.getParam(
"best_md_score_output_file"))
515 self.singlePdbResults(
518 self.getParam(
"best_md_rmsd_output_file"))
519 self.singlePdbResults(
522 self.getParam(
"final_md_frame_output_file"))
524 self.bestMdScore = round(bestMdScore, 2)
525 self.bestMdRmsd = round(bestRmsd, 2)
526 self.bestMdScoreFrame = bestScoreFrame
527 self.bestMdRmsdFrame = bestRmsdFrame
529 def readCgTrajectories(self, atomList, flexibleAtoms):
531 cgFileName = self.getParam(
"cg_output_file")
532 bestCgScore = 10000000
534 bestCgRmsd = 10000000
537 outputDir = self.getParam(
"output_directory")
538 trajectoryFile = self.getParam(
"md_trajectory_output_file")
539 fullFile = os.path.join(outputDir, trajectoryFile)
540 rh = RMF.open_rmf_file(fullFile)
541 IMP.rmf.set_hierarchies(rh, [self.protein])
542 framesToRead = atomicDominoUtilities.getMdIntervalFrames(
543 rh, int(self.getParam(
"cg_interval")), self.protein)
545 skipCgDomino = int(self.getParam(
"skip_cg_domino"))
547 if (len(framesToRead) > 0):
548 for cgNumber
in framesToRead:
550 outputDir = self.getParam(
"output_directory")
551 fullCgFileName = os.path.join(
553 (cgFileName, cgNumber))
554 rh = RMF.open_rmf_file(fullCgFileName)
555 IMP.rmf.set_hierarchies(rh, [self.protein])
558 frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
561 if (frameCount > 20):
562 startFrameCount = frameCount - 20
564 for i
in range(startFrameCount, frameCount):
568 cgScoreOutputFile = os.path.join(
570 (self.getParam(
"cg_score_output_file"), cgNumber))
571 [cgScore, cgScoreFrame, cgRmsd, cgRmsdFrame] = self.readTrajectoryFile(
572 atomList, rh, cgFrames, cgScoreOutputFile, skipCgDomino, flexibleAtoms)
573 print(
"cg number %s rmsd %s score %s" % (cgNumber, cgRmsd, cgScore))
575 if (cgScore < bestCgScore):
576 bestCgScore = cgScore
577 bestCgScoreFile = fullCgFileName
578 if (cgRmsd < bestCgRmsd):
580 bestCgRmsdFile = fullCgFileName
583 self.singlePdbResults(
586 self.getParam(
"best_cg_score_output_file"))
587 self.singlePdbResults(
590 self.getParam(
"best_cg_rmsd_output_file"))
591 self.singlePdbResults(
593 (cgFileName, framesToRead[-1]), -1, self.getParam(
"final_cg_frame_output_file"))
594 finalCgRmsd = self.calculateNativeRmsd(flexibleAtoms)
595 print(
"final cg rmsd is %s " % finalCgRmsd)
596 self.bestCgScore = round(bestCgScore, 2)
597 self.bestCgRmsd = round(bestCgRmsd, 2)
598 self.bestCgScoreFile = bestCgScoreFile
599 self.bestCgRmsdFile = bestCgRmsdFile
601 def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
603 fullTrajectoryFile = os.path.join(
604 self.getParam(
"output_directory"),
606 fullOutputFile = os.path.join(
607 self.getParam(
"output_directory"),
609 rh = RMF.open_rmf_file(fullTrajectoryFile)
610 IMP.rmf.set_hierarchies(rh, [self.protein])
612 frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
616 def calculateRmsd(self, otherProtein, flexibleAtoms):
617 otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(
621 for pName
in otherNamesToParticles.keys():
622 if ((pName
in flexibleAtoms) == 0):
624 otherParticle = otherNamesToParticles[pName]
625 modelParticle = self.namesToParticles[pName]
626 otherVector.append(
IMP.core.XYZ(otherParticle).get_coordinates())
627 modelVector.append(
IMP.core.XYZ(modelParticle).get_coordinates())
631 def calculateNativeRmsd(self, flexibleAtoms):
633 if (self.wroteNativeProtein == 0):
634 pdbName = self.getParam(
"native_pdb_input_file")
640 self.wroteNativeProtein = 1
642 return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
644 def calculateTrajectoryRmsd(
649 pdbName = self.getParam(
"native_pdb_input_file")
655 outputDir = self.getParam(
"output_directory")
656 fullFile = os.path.join(outputDir, trajectoryFile)
657 print(
"open calculate traj rmf %s" % fullFile)
658 rh = RMF.open_rmf_file(fullFile)
659 IMP.rmf.set_hierarchies(rh, [otherProtein])
660 if (trajectoryFrame == -1):
661 trajectoryFrame = IMP.rmf.get_number_of_frames(
662 rh, otherProtein) - 1
664 return self.calculateRmsd(otherProtein, flexibleAtoms)
666 def createAllSubsetAssignments(self):
670 self.model.get_root_restraint_set(),
673 leafNodeIndexList = self.getLeafNodeIndexList()
675 for nodeIndex
in leafNodeIndexList:
677 subset = self.mt.get_vertex_name(nodeIndex)
678 particleNameList = []
679 for particle
in subset:
680 particleNameList.append(self.quickParticleName(particle))
681 print(
"creating initial assignments for leaf %s" % nodeIndex)
683 assignments = self.createUniqueLeafAssignments(
684 particleNameList, self.particleInfo)
685 filteredAssignments = self.filterAssignments(
686 assignments, subset, nodeIndex, rssft)
690 for assignment
in filteredAssignments:
691 packedAssignmentContainer.add_assignment(assignment)
692 lat.set_assignments(subset, packedAssignmentContainer)
698 root = self.mt.get_vertices()[-1]
699 completeAc = self.loadAssignments(root)
700 self.completeAc = completeAc
702 def loadAssignments(self, nodeIndex):
704 children = self.mt.get_out_neighbors(nodeIndex)
705 subset = self.mt.get_vertex_name(nodeIndex)
706 heapCount = int(self.getParam(
"heap_count"))
708 heapCount, self.rssft.get_subset_filter(subset, []))
709 if len(children) == 0:
710 print(
"computing assignments for leaf %s" % nodeIndex)
712 self.sampler.load_vertex_assignments(nodeIndex, mine)
713 print(
"leaf node %s has %s leaf assignments" % (nodeIndex, mine.get_number_of_assignments()))
715 if (children[0] > children[1]):
716 children = [children[1], children[0]]
718 firstAc = self.loadAssignments(children[0])
719 secondAc = self.loadAssignments(children[1])
721 self.sampler.load_vertex_assignments(
722 nodeIndex, firstAc, secondAc, mine)
724 timeDifference = self.logTimePoint(0)
725 print(
"Done Parent %s Assignments %s first child %s second child %s time %s" % (nodeIndex, mine.get_number_of_assignments(), firstAc.get_number_of_assignments(),
726 secondAc.get_number_of_assignments(), timeDifference))
727 self.totalAssignments += mine.get_number_of_assignments()
731 def writeOutput(self, flexibleAtoms, startTime):
733 bestDominoScore = 100000
735 finalAssignments = self.completeAc.get_assignments()
736 for assignment
in finalAssignments:
738 self.dominoPst.get_subset(),
741 score = self.model.evaluate(
False)
742 if (score < bestDominoScore):
743 bestAssignment = assignment
744 bestDominoScore = round(score, 2)
745 print(
"best domino score is %s " % bestDominoScore)
746 print(
"best md score is %s" % self.bestMdScore)
747 print(
"best md rmsd is %s" % self.bestMdRmsd)
748 print(
"best cg score is %s" % self.bestCgScore)
749 print(
"best cg rmsd is %s" % self.bestCgRmsd)
750 print(
"merge tree contained %s total assignments" % self.totalAssignments)
753 self.dominoPst.get_subset(),
756 dominoVsMdRmsd = round(
757 self.calculateTrajectoryRmsd(
759 "md_trajectory_output_file"),
760 self.bestMdScoreFrame,
767 os.path.join(self.getParam(
"output_directory"),
768 self.getParam(
"minimum_domino_score_pdb")))
770 dominoVsCgRmsd = round(
771 self.calculateTrajectoryRmsd(
772 self.bestCgScoreFile,
776 dominoMinimizedScore = round(self.model.evaluate(
False), 2)
777 dominoRmsd = round(self.calculateNativeRmsd(flexibleAtoms), 2)
779 runTime = round(time.time() - startTime, 2)
780 print(
"final domino score (after cg): %s" % dominoMinimizedScore)
781 print(
"final domino rmsd: %s" % dominoRmsd)
782 print(
"best domino rmsd with best md score: %s" % dominoVsMdRmsd)
783 print(
"domino rmsd with best cg score: %s" % dominoVsCgRmsd)
784 print(
"Final Results\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.bestMdScore, self.bestMdRmsd, self.bestCgScore, self.bestCgRmsd, bestDominoScore, dominoRmsd, dominoMinimizedScore, dominoVsCgRmsd, self.totalAssignments, self.maxMem, runTime))
789 def createSubsetFromParticles(self, particleNames):
790 particleNameList = particleNames.split(
" ")
792 for particleName
in particleNameList:
793 particle = self.namesToParticles[particleName]
794 particleList.append(particle)
797 return [subset, particleList]
799 def createHdf5AssignmentContainer(self, index, particleNames, read):
800 root = self.getAssignmentContainerRoot(index, read)
801 print(
"got root for index %s" % index)
804 dataset = root.get_child_index_data_set_2d(str(index))
806 dataset = root.add_child_index_data_set_2d(str(index))
807 print(
"added child index dataset")
809 [subset, particleOrder] = self.createSubsetFromParticles(particleNames)
810 print(
"created subset for index %s" % index)
811 hdf5Ac = IMP.domino.create_assignments_container(
812 dataset, subset, particleOrder)
813 print(
"returning from create")
815 for nextInt
in order:
816 print(
"next int is %s" % nextInt)
817 return [subset, hdf5Ac]
819 def loadAssignmentsParallel(
824 mtIndexToSubsetOrder,
828 if ((
"firstChild" in mtIndexToNodeInfo[nodeIndex]) == 0):
829 print(
"writing file for leaf index %s" % nodeIndex)
831 self.createAssignmentsParallel(
838 beginTime = self.logTimePoint(1)
839 firstChildIndex = mtIndexToNodeInfo[nodeIndex][
"firstChild"]
840 [firstSubset, firstAc] = self.createHdf5AssignmentContainer(
841 firstChildIndex, mtIndexToSubsetOrder[firstChildIndex], 1)
842 firstAcCreateTime = self.logTimePoint(0)
844 secondChildIndex = mtIndexToNodeInfo[nodeIndex][
"secondChild"]
845 [secondSubset, secondAc] = self.createHdf5AssignmentContainer(
846 secondChildIndex, mtIndexToSubsetOrder[secondChildIndex], 1)
847 secondAcCreateTime = self.logTimePoint(0)
849 print(
"getting assignments for nodeIndex %s first child %s second child %s" % (nodeIndex, firstChildIndex, secondChildIndex))
850 firstChildParticles = mtIndexToSubsetOrder[firstChildIndex]
851 secondChildParticles = mtIndexToSubsetOrder[secondChildIndex]
852 myParticles = mtIndexToParticles[nodeIndex]
854 print(
"first child particles %s\nsecond child particles %s\nmy particles; %s\n" % (firstChildParticles, secondChildParticles, myParticles))
855 for p
in firstSubset:
856 print(
"next particle in first subset: %s" % self.quickParticleName(p))
858 for p
in secondSubset:
859 print(
"next particle in second subset: %s" % self.quickParticleName(p))
861 for assignment
in firstAc.get_assignments():
862 print(
"next assignment for first child %s: %s" % (firstChildIndex, assignment))
864 for assignment
in secondAc.get_assignments():
865 print(
"next assignment for second child %s: %s" % (secondChildIndex, assignment))
867 [mySubset, myAc] = self.createHdf5AssignmentContainer(
868 nodeIndex, mtIndexToParticles[nodeIndex], 0)
869 print(
"done creating hdf5")
870 prepTime = self.logTimePoint(0)
872 self.model.get_root_restraint_set(),
874 rssf = rssft.get_subset_filter(mySubset, [])
886 heapTime = self.logTimePoint(0)
888 addTime = self.logTimePoint(0)
889 for assignment
in myAc.get_assignments():
890 print(
"loadAssignmentsParallel next assignment for %s: %s" % (nodeIndex, assignment))
891 doneTime = self.logTimePoint(0)
892 firstChildCount = firstAc.get_number_of_assignments()
893 secondChildCount = secondAc.get_number_of_assignments()
894 print(
"first count: %s second count: %s begin: %s firstAc: %s secondAc: %s prep: %s heap: %s add: %s done: %s" % (firstChildCount, secondChildCount, beginTime, firstAcCreateTime, secondAcCreateTime, prepTime, heapTime, addTime, doneTime))
895 subsetOrder = self.getSubsetOrderList(mySubset)
898 def createAssignmentsParallel(
904 subsetName = mtIndexToParticles[nodeIndex]
905 print(
"starting assignments parallel leaf index %s subset name %s" % (nodeIndex, subsetName))
906 [subset, particleList] = self.createSubsetFromParticles(subsetName)
908 particleNameList = subsetName.split(
" ")
911 finalAssignments = self.createUniqueLeafAssignments(
912 particleNameList, particleInfo)
919 self.model.get_root_restraint_set(),
921 filteredAssignments = self.filterAssignments(
922 finalAssignments, subset, nodeIndex, rssft)
923 root = self.getAssignmentContainerRoot(nodeIndex, 0)
924 dataset = root.add_child_index_data_set_2d(str(nodeIndex))
925 dataset.set_size([0, len(subset)])
927 hdf5AssignmentContainer = IMP.domino.create_assignments_container(
928 dataset, subset, particleOrder)
929 for assignment
in filteredAssignments:
930 hdf5AssignmentContainer.add_assignment(assignment)
931 for assignment
in hdf5AssignmentContainer.get_assignments():
932 print(
"hdf5 assignment container node %s next assignmet %s" % (nodeIndex, assignment))
935 subsetOrder = self.getSubsetOrderList(subset)
936 print(
"leaf node returning with order %s" % subsetOrder)
941 def writePymolData(self):
943 outputDir = self.getParam(
"output_directory")
945 pymolInteractions = self.getParam(
"pymol_interactions_file")
951 pymolSubsets = self.getParam(
"pymol_subsets_file")
959 def cleanVertexName(self, vertexName):
963 nodeRe = re.compile(
'Subset\(\[(.*?)\s*\]')
964 secondNodeRe = re.compile(
'\[(.*?)\s*\]')
965 node = nodeRe.search(str(vertexName))
966 secondNode = secondNodeRe.search(str(vertexName))
970 foundName = node.group(1)
972 foundName = secondNode.group(1)
973 vertexNameFinal = foundName.replace(
'"',
'')
974 return vertexNameFinal
976 def getSubsetOrderList(self, subset):
978 for particle
in subset:
979 name = self.quickParticleName(particle)
980 subsetOrderList.append(name)
981 subsetOrder =
" ".join(subsetOrderList)
984 def checkAssignments(self, subset, nodeIndex, particleOrder):
985 print(
"reading back in assignments for leaf index %s" % nodeIndex)
986 root = self.getAssignmentContainerRoot(nodeIndex, 1)
987 dataset = root.get_child_index_data_set_2d(str(nodeIndex))
988 hdf5 = IMP.domino.create_assignments_container(
989 dataset, subset, particleOrder)
990 for assignment
in hdf5.get_assignments():
991 print(
"leaf index %s read back in %s" % (nodeIndex, assignment))
993 def createSamplerLite(self):
996 if (self.getParam(
"cross_subset_filtering") == 1):
997 s.set_use_cross_subset_filtering(1)
1001 def createSiblingMap(self, parentIndex):
1003 children = self.mt.get_out_neighbors(parentIndex)
1004 if (len(children) > 0):
1005 firstChild = children[0]
1006 secondChild = children[1]
1007 self.parentSiblingMap[firstChild] = {}
1008 self.parentSiblingMap[firstChild][
"sibling"] = secondChild
1009 self.parentSiblingMap[firstChild][
"parent"] = parentIndex
1011 self.parentSiblingMap[secondChild] = {}
1012 self.parentSiblingMap[secondChild][
"sibling"] = firstChild
1013 self.parentSiblingMap[secondChild][
"parent"] = parentIndex
1014 print(
"created map for parent %s first child %s second child %s" % (parentIndex, firstChild, secondChild))
1016 self.parentSiblingMap[parentIndex][
"firstChild"] = firstChild
1017 self.parentSiblingMap[parentIndex][
"secondChild"] = secondChild
1019 self.createSiblingMap(firstChild)
1020 self.createSiblingMap(secondChild)
1022 def getMtIndexToNodeInfo(self):
1023 return self.parentSiblingMap
1025 def getLeafParentNodeIndexList(self):
1026 leafParentNodeIndexList = {}
1027 leafNodeIndexList = self.getLeafNodeIndexList()
1028 for leafIndex
in leafNodeIndexList:
1029 parent = self.parentSiblingMap[leafIndex][
"parent"]
1030 leafParentNodeIndexList[parent] = 1
1031 return leafParentNodeIndexList
1033 def getMtIndexToNameList(selt):
1035 for index
in self.mt.get_vertices():
1036 name = self.mt.get_vertex_name(index)
1037 mtIndexToNames[index] = name
1038 return mtIndexToNames
1040 def getAssignmentContainerRoot(self, subsetIndex, read):
1041 outputDir = self.getParam(
"output_directory")
1042 filePrefix = self.getParam(
"subset_assignment_output_file")
1043 assignmentFileName = os.path.join(
1045 (filePrefix, subsetIndex))
1046 print(
"creating hdf5 file with name %s" % assignmentFileName)
1049 root = RMF.open_hdf5_file(assignmentFileName)
1051 root = RMF.create_hdf5_file(assignmentFileName)
1058 def writeVisualization(self):
1060 self.writeCytoscapeIgInput()
1061 self.writeCytoscapeJtInput()
1062 self.writeCytoscapeMtInput()
1063 self.writeCytoscapeScripts()
1064 self.writePymolData()
1066 def writeCytoscapeScripts(self):
1067 outputDir = self.getParam(
"output_directory")
1068 mTreeCytoscapeInput = self.getParam(
"mtree_cytoscape_input_file")
1069 mTreeCytoscapeAssignments = self.getParam(
1070 "mtree_cytoscape_assignment_file")
1071 mTreeCytoscapeAtomChains = self.getParam(
1072 "mtree_cytoscape_atom_chain_file")
1073 mTreeCytoscapeAtomSummary = self.getParam(
1074 "mtree_cytoscape_atom_summary_file")
1076 mTreeCytoscapeScript = self.getParam(
"mtree_cytoscape_script")
1077 mTreeFh = open(os.path.join(outputDir, mTreeCytoscapeScript),
'w')
1079 "network import file=%s\n" %
1080 os.path.join(outputDir, mTreeCytoscapeInput))
1082 "node import attributes file=\"%s\"\n" %
1083 os.path.join(outputDir, mTreeCytoscapeAssignments))
1085 "node import attributes file=\"%s\"\n" %
1086 os.path.join(outputDir, mTreeCytoscapeAtomSummary))
1088 "node import attributes file=\"%s\"\n" %
1089 os.path.join(outputDir, mTreeCytoscapeAtomChains))
1090 mTreeFh.write(
"layout jgraph-tree\n")
1092 def getGraphStructure(self, graph, fileName, separator):
1094 graphLogWrite = open(fileName,
'w')
1095 graph.show(graphLogWrite)
1096 graphLogWrite.close()
1099 graphLogRead = open(fileName,
'r')
1101 nodeRe = re.compile('^(\d+)\[label\=\"\[*(.*?)\s*\]*\"')
1102 separatorEscape =
"\\" + separator
1103 edgeString =
"^(\d+)\-%s(\d+)" % separatorEscape
1104 edgeRe = re.compile(edgeString)
1110 for line
in graphLogRead:
1113 node = nodeRe.search(line)
1115 nodeNumber = node.group(1)
1116 atomString = node.group(2)
1117 nodesToNames[nodeNumber] = atomString
1121 edge = edgeRe.search(line)
1123 firstNode = edge.group(1)
1124 secondNode = edge.group(2)
1126 if (firstNode
in nodesToNodes):
1127 firstNodeDict = nodesToNodes[firstNode]
1128 firstNodeDict[secondNode] = 1
1129 nodesToNodes[firstNode] = firstNodeDict
1131 return [nodesToNames, nodesToNodes]
1133 def writeEdgeFile(self, nodesToNodes, edgeFileName):
1135 outputDir = self.getParam(
"output_directory")
1136 graphInputFile = open(os.path.join(outputDir, edgeFileName),
'w')
1137 for firstNode
in nodesToNodes.keys():
1138 nodeDict = nodesToNodes[firstNode]
1139 for secondNode
in nodeDict.keys():
1140 graphInputFile.write(
"%s ttt %s\n" % (firstNode, secondNode))
1141 graphInputFile.close()
1143 def writeCytoscapeIgInput(self):
1144 outputDir = self.getParam(
"output_directory")
1145 igOutputFile = self.getParam(
"ig_output_file")
1146 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1147 self.ig, os.path.join(outputDir, igOutputFile),
"-")
1151 self.getParam(
"ig_cytoscape_input_file"))
1154 igResiduesFile = self.getParam(
"ig_cytoscape_residues_file")
1155 peptideChain = self.getParam(
"peptide_chain")
1157 subsetResidueFile = open(os.path.join(outputDir, igResiduesFile),
'w')
1158 subsetResidueFile.write(
"ResidueNumber\n")
1159 for nodeNumber
in nodesToNames.keys():
1160 nodeName = nodesToNames[nodeNumber]
1162 [nodeChain, residueNumber,
1163 nodeAtom] = atomicDominoUtilities.getAtomInfoFromName(nodeName)
1164 if (nodeChain == peptideChain):
1165 subsetResidueFile.write(
1167 (nodeNumber, residueNumber))
1171 subsetResidueFile.write(
"%s = 100\n" % nodeNumber)
1173 def writeCytoscapeMtInput(self):
1174 outputDir = self.getParam(
"output_directory")
1175 mTreeOutputFile = self.getParam(
"mtree_output_file")
1176 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1177 self.mt, os.path.join(outputDir, mTreeOutputFile),
">")
1181 self.getParam(
"mtree_cytoscape_input_file"))
1182 self.writeNodeNameAttributes(
1183 nodesToNames, self.getParam(
"mtree_cytoscape_atom_name_file"), self.getParam(
1184 "mtree_cytoscape_atom_summary_file"),
1185 self.getParam(
"mtree_cytoscape_atom_chain_file"))
1187 def writeCytoscapeJtInput(self):
1189 outputDir = self.getParam(
"output_directory")
1190 jTreeOutputFile = self.getParam(
"jtree_output_file")
1191 [nodesToNames, nodesToNodes] = self.getGraphStructure(
1192 self.jt, os.path.join(outputDir, jTreeOutputFile),
"-")
1196 self.getParam(
"jtree_cytoscape_input_file"))
1198 self.writeNodeNameAttributes(
1199 nodesToNames, self.getParam(
"jtree_cytoscape_atom_name_file"), self.getParam(
1200 "jtree_cytoscape_atom_summary_file"),
1201 self.getParam(
"jtree_cytoscape_atom_chain_file"))
1205 jtreeCytoscapeEdgeFile = self.getParam(
"jtree_cytoscape_edge_file")
1206 edgeWeightFile = open(
1207 os.path.join(outputDir,
1208 jtreeCytoscapeEdgeFile),
1210 edgeWeightFile.write(
"SubsetOverlap (class=Integer)\n")
1211 for firstNode
in nodesToNodes.keys():
1212 nodeDict = nodesToNodes[firstNode]
1213 for secondNode
in nodeDict.keys():
1214 firstNodeAtoms = nodesToNames[firstNode].split(
" ")
1215 secondNodeAtoms = nodesToNames[secondNode].split(
" ")
1217 val
for val
in firstNodeAtoms
if val
in secondNodeAtoms]
1218 edgeWeightFile.write(
1219 "%s (pp) %s = %s\n" %
1220 (firstNode, secondNode, len(intersection)))
1221 edgeWeightFile.close()
1223 def getAtomTypeCounts(self, atomNames):
1225 atomNames = atomNames.lstrip(
'[')
1226 atomNames = atomNames.rstrip(
']')
1227 atomNames = atomNames.rstrip()
1229 atomList = atomNames.split(
" ")
1230 peptideAtomCount = 0
1231 proteinAtomCount = 0
1232 for atom
in atomList:
1233 [chain, residue, atom] = atomicDominoUtilities.getAtomInfoFromName(
1235 if (chain == self.getParam(
"peptide_chain")):
1236 peptideAtomCount += 1
1238 proteinAtomCount += 1
1239 return [peptideAtomCount, proteinAtomCount]
1241 def writeNodeNameAttributes(
1248 outputDir = self.getParam(
"output_directory")
1249 subsetAtomNameFile = open(os.path.join(outputDir, atomNameFile),
'w')
1250 subsetAtomSummaryFile = open(
1251 os.path.join(outputDir, atomSummaryFile),
'w')
1252 subsetAtomChainFile = open(os.path.join(outputDir, atomChainFile),
'w')
1254 subsetAtomNameFile.write(
"Atom names\n")
1255 subsetAtomSummaryFile.write(
"Atom Summary\n")
1256 subsetAtomChainFile.write(
"Atom chain\n")
1257 for node
in nodesToNames.keys():
1258 atomNames = nodesToNames[node]
1259 subsetAtomNameFile.write(
"%s = %s\n" % (node, atomNames))
1260 [peptideAtomCount, proteinAtomCount] = self.getAtomTypeCounts(
1263 subsetAtomSummaryFile.write(
1265 (node, proteinAtomCount, peptideAtomCount))
1268 if (proteinAtomCount == 0):
1269 subsetAtomChainFile.write(
"%s = 1\n" % node)
1270 elif (peptideAtomCount == 0):
1271 subsetAtomChainFile.write(
"%s = 2\n" % node)
1273 subsetAtomChainFile.write(
"%s = 3\n" % node)
1275 subsetAtomChainFile.close()
1276 subsetAtomSummaryFile.close()
1277 subsetAtomNameFile.close()
InteractionGraph get_interaction_graph(ScoringFunctionAdaptor rs, const ParticlesTemp &pst)
Chain get_chain(Hierarchy h)
Grid3D< int, SparseGridStorage3D< int, UnboundedGridStorage3D > > SparseUnboundedIntGrid3D
DependencyGraph get_dependency_graph(Model *m)
SubsetGraph get_junction_tree(const InteractionGraph &ig)
Simple conjugate gradients optimizer.
Sample best solutions using Domino.
algebra::BoundingBoxD< 3 > get_bounding_box(const Hierarchy &h)
Get a bounding box for the Hierarchy.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Subsets get_subsets(const SubsetGraph &g)
Gets all of the Subsets of a SubsetGraph.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Grid3D< double, DenseGridStorage3D< double > > DenseDoubleGrid3D
MergeTree get_balanced_merge_tree(const SubsetGraph &junction_tree)
void read_pdb(TextInput input, int model, Hierarchy h)
display::Geometries get_subset_graph_geometry(const SubsetGraph &ig)
Class for storing model, its restraints, constraints, and particles.
Select all non-alternative ATOM records.
double get_rmsd(const Selection &s0, const Selection &s1)
ParticlesTemp get_order(const Subset &s, const SubsetFilterTables &sft)
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Gather all the molecular particles of a certain level in the hierarchy.
void load_merged_assignments(const Subset &first_subset, AssignmentContainer *first, const Subset &second_subset, AssignmentContainer *second, const SubsetFilterTablesTemp &filters, AssignmentContainer *ret)
Fill in assignments for an internal node.
RestraintsTemp get_restraints(It b, It e)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
A decorator for a particle with x,y,z coordinates.
void set_log_level(LogLevel l)
Set the current global log level.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Store a configuration of a subset.
Write a CGO file with the geometry.
display::Geometries get_interaction_graph_geometry(const InteractionGraph &ig)
void load_particle_states(const Subset &s, const Assignment &ss, const ParticleStatesTable *pst)
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.
Divide-and-conquer inferential optimization in discrete space.