16 import atomicDominoUtilities
20 def __init__(self, model, protein, parameterFileName):
22 self.protein = protein
23 self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(protein)
24 self.readParameters(parameterFileName)
25 self.wroteNativeProtein = 0
28 def readParameters(self, parameterFileName):
29 self.parameters = atomicDominoUtilities.readParameters(parameterFileName)
32 currentMem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
34 print "log memory: %s" % currentMem
35 if (currentMem > self.maxMem):
36 self.maxMem = currentMem
39 def getParam(self, paramName):
40 paramValue = self.parameters[paramName]
43 def loadDominoHelpers(self):
44 self.subsetStateScores = {}
45 self.subsetRestraintScores = {}
46 self.subsetStateFailures = {}
47 self.nodesToAssignmentCount = {}
49 self.totalAssignments = 0
50 outputDir = self.getParam(
"output_directory")
51 mtreeCytoscapeAssignmentFile = self.getParam(
"mtree_cytoscape_assignment_file")
52 self.mtreeCytoscapeAssignmentFh = open(os.path.join(outputDir, mtreeCytoscapeAssignmentFile),
'w')
53 self.mtreeCytoscapeAssignmentFh.write(
"Assignments\n")
55 self.mtNamesToIndices = {}
57 self.subsetNamesToAssignmentFiles = {}
63 def logTimePoint(self, reset):
66 timeDifference = newTime - self.timePoint
67 timeDifference = round(timeDifference, 0)
68 self.timePoint = newTime
72 self.timePoint = newTime
75 def createParticleStatesTable(self):
77 for particleName
in self.particleInfo.keys():
80 dataArray = self.particleInfo[particleName]
81 for i
in range(len(dataArray)):
82 [state, center] = dataArray[i]
83 statesToCenters[state] = center
86 for stateIndex
in sorted(statesToCenters.keys()):
88 statesList.append(vector3d)
91 self.dominoPst.set_particle_states(self.namesToParticles[particleName], xyzStates)
93 def quickParticleName(self, particle):
94 return atomicDominoUtilities.quickParticleName(particle)
96 def filterAssignments(self, assignments, subset, nodeIndex, rssft):
102 for r
in IMP.get_restraints([self.model.get_root_restraint_set()]):
103 restraintList.append(r)
104 dg = IMP.get_dependency_graph(restraintList)
111 sFilter = rssft.get_subset_filter(subset, filteredSubsets)
113 filteredAssignments = []
114 for assignment
in assignments:
116 if (sFilter ==
None or sFilter.get_is_ok(assignment)):
119 filteredAssignments.append(assignment)
122 fraction = (passedCounter * 1.0) / (stateCounter * 1.0)
123 print "%s states passed out of %s total for this subset (fraction %s)" % (passedCounter, stateCounter, fraction)
124 if (passedCounter == 0):
125 print "subset %s had 0 assignments (out of %s) pass. Exiting..." % (subset, stateCounter)
128 return filteredAssignments
131 def quickSubsetName(self, subset):
132 cleanName = self.cleanVertexName(subset)
133 atomNameList = cleanName.split(
" ")
134 sortedList = sorted(atomNameList)
136 name =
" ".join(sortedList)
139 def getNodeIndexList(self):
140 return self.mt.get_vertices()
142 def getLeafNodeIndexList(self):
143 leafNodeIndexList = []
144 for subset
in self.subsets:
145 index = self.getMtIndexForSubset(self.quickSubsetName(subset))
146 leafNodeIndexList.append(index)
147 return leafNodeIndexList
152 def createSampler(self):
155 s.set_merge_tree(self.mt)
157 filterTables.append(self.rssft)
158 s.set_subset_filter_tables(filterTables)
160 s.set_assignments_table(self.lat)
162 if (self.getParam(
"cross_subset_filtering") == 1):
163 s.set_use_cross_subset_filtering(1)
169 def createSubsets(self):
170 self.initializeParticleStatesTable()
172 print "interaction graph:"
175 print "junction tree:"
177 self.subsetNamesToSubsets = {}
182 for index
in mt.get_vertices():
183 subset = mt.get_vertex_name(index)
184 subsetName = self.cleanVertexName(subset)
185 self.mtNamesToIndices[subsetName] = index
188 for subset
in subsets:
189 print "created subset %s" % subset
190 subsetName = self.quickSubsetName(subset)
191 self.subsetNamesToSubsets[subsetName] = subset
200 self.subsets = subsets
202 self.parentSiblingMap = {}
203 self.parentSiblingMap[self.mt.get_vertices()[-1]] = {}
204 self.createSiblingMap(self.mt.get_vertices()[-1])
207 def getMtIndexForSubset(self, subsetName):
208 for index
in self.mt.get_vertices():
209 mtNode = self.mt.get_vertex_name(index)
210 mtName = self.cleanVertexName(mtNode)
211 mtName = mtName.rstrip()
212 mtName = mtName.lstrip()
214 mtNameList = mtName.split(
" ")
215 subsetNameList = subsetName.split(
" ")
216 mtNameListSorted = sorted(mtNameList)
217 subsetNameListSorted = sorted(subsetNameList)
218 if (
" ".join(subsetNameListSorted) ==
" ".join(mtNameListSorted)):
221 print "did not find merge tree index for subset name %s" % subsetName
225 def getDominoParticleNames(self):
226 particles = self.dominoPst.get_particles()
227 particleNameList = []
228 for particle
in particles:
229 pName = self.quickParticleName(particle)
230 particleNameList.append(pName)
232 return particleNameList
234 def getSubsetNameList(self):
236 for subset
in self.subsets:
237 name = self.quickSubsetName(subset)
238 subsetNameList.append(name)
239 return subsetNameList
241 def getMtIndexToParticles(self):
243 mtIndexToParticles = {}
244 allVertices = self.mt.get_vertices()
245 for nodeIndex
in allVertices:
246 subset = self.mt.get_vertex_name(nodeIndex)
247 particleList = self.quickSubsetName(subset)
248 mtIndexToParticles[nodeIndex] = particleList
249 print "createtMtIndexToParticles: index %s is particleList %s " % (nodeIndex, particleList)
251 return mtIndexToParticles
254 def readTrajectoryFile(self, atomList, rh, frames, scoreOutputFile, skipDomino, flexibleAtoms):
260 scoreOutputFh = open(scoreOutputFile,
'w')
265 print "execption in loading frame %s" % i
267 score = self.model.evaluate(
False)
273 rmsd = self.calculateNativeRmsd(flexibleAtoms)
274 scoreOutputFh.write(
"%s\t%s\t%s\n" % (i, score, rmsd))
275 if (score < bestScore):
279 if (rmsd < bestRmsd):
282 if (skipDomino == 0):
283 for atomName
in atomList:
285 particle = self.namesToParticles[atomName]
288 gridIndex = self.snapToGrid(particle)
289 center = self.grid.get_center(gridIndex)
291 for coordinate
in center:
292 pythonCenter.append(coordinate)
295 if (self.particleStatesSeen[atomName].has_key(gridIndex) == 0):
297 currentSize = len(self.particleStatesSeen[atomName].keys())
298 self.particleStatesSeen[atomName][gridIndex] = currentSize
299 state = self.particleStatesSeen[atomName][gridIndex]
300 self.particleInfo[atomName].append([state, pythonCenter])
302 print "didn't add domino states due to skip parameters"
303 return [bestScore, bestScoreFrame, bestRmsd, bestRmsdFrame]
308 def getParticleGridIndex(self, leaf):
310 coordinates = xyzDecorator.get_coordinates()
312 extendedIndex = self.grid.get_extended_index(coordinates)
313 if (self.grid.get_has_index(extendedIndex) == 0):
314 if (self.getParam(
"grid_type") ==
"sparse"):
315 self.grid.add_voxel(extendedIndex, 1)
317 self.grid.add_voxel(extendedIndex)
319 index = self.grid.get_index(extendedIndex)
324 def snapToGrid(self, particle):
326 index = self.getParticleGridIndex(particle)
327 center = self.grid.get_center(index)
329 xyzDecorator.set_coordinates(center)
334 def discretizeNativeProtein(self):
336 outputDir = self.getParam(
"output_directory")
337 nativeSnappedFile = self.getParam(
"native_protein_snapped_output_file")
341 self.snapToGrid(leaf)
346 def getPeptideCa(self):
350 peptideIndicesToResidues = {}
351 for residueH
in residues:
353 chainId = chain.get_id()
354 residue = residueH.get_as_residue()
355 if (chainId == self.getParam(
"peptide_chain")):
356 peptideIndicesToResidues[residue.get_index()] = residue
359 minPeptide = min(sorted(peptideIndicesToResidues.keys()))
360 maxPeptide = max(sorted(peptideIndicesToResidues.keys()))
361 centerPeptide = round(((maxPeptide - minPeptide) / 2 + minPeptide), 0)
364 centerName = atomicDominoUtilities.makeParticleName(self.getParam(
"peptide_chain"), int(centerPeptide),
"CA")
365 centerParticle = self.namesToParticles[centerName]
366 return centerParticle
370 def createGrid(self):
374 gridSpacing = float(self.getParam(
"grid_spacing"))
375 bufferSpace = float(self.getParam(
"grid_buffer_space"))
377 protBb += bufferSpace
379 if (self.getParam(
"grid_type") ==
"sparse"):
380 ca = self.getPeptideCa()
390 def initializeParticleStatesTable(self):
393 restrainedParticles = atomicDominoUtilities.getRestrainedParticles(self.protein, self.model, self.namesToParticles)
395 for p
in restrainedParticles:
400 dominoPst.set_particle_states(p, xyzStates)
402 self.dominoPst = dominoPst
404 def createUniqueLeafAssignments(self, particleNameList, particleInfo):
406 size = len(particleInfo[particleNameList[0]])
409 for i
in range(size):
411 for particleName
in particleNameList:
412 dataArray = particleInfo[particleName]
413 [state, center] = dataArray[i]
414 nextAssignment.append(state)
415 allAssignments.append(nextAssignment)
418 uniqueAssignments = {}
419 for assignment
in allAssignments:
422 for index
in assignment:
423 stateString = stateString + str(index) +
"_"
424 uniqueAssignments[stateString] = assignment
425 finalAssignments = []
428 for stateString
in uniqueAssignments.keys():
429 assignmentList = uniqueAssignments[stateString]
431 finalAssignments.append(assignment)
432 return finalAssignments
436 def readMdTrajectory(self, atomList, flexibleAtoms):
439 outputDir = self.getParam(
"output_directory")
440 trajectoryFile = self.getParam(
"md_trajectory_output_file")
441 fullFile = os.path.join(outputDir, trajectoryFile)
442 rh = RMF.open_rmf_file(fullFile)
443 IMP.rmf.set_hierarchies(rh, [self.protein])
444 framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam(
"md_interval")), self.protein)
445 print "preparing to read md frames %s" % framesToRead
448 particleStatesSeen = {}
449 for atomName
in atomList:
450 particle = self.namesToParticles[atomName]
451 particleInfo[atomName] = []
452 particleStatesSeen[atomName] = {}
454 self.particleStatesSeen = particleStatesSeen
455 self.particleInfo = particleInfo
458 mdScoreOutputFile = os.path.join(outputDir,
"%s" % self.getParam(
"md_score_output_file"))
459 [bestMdScore, bestScoreFrame, bestRmsd, bestRmsdFrame] = self.readTrajectoryFile(atomList, rh, framesToRead, mdScoreOutputFile, 0, flexibleAtoms)
463 self.singlePdbResults(fullFile, bestScoreFrame, self.getParam(
"best_md_score_output_file"))
466 self.singlePdbResults(fullFile, bestRmsdFrame, self.getParam(
"best_md_rmsd_output_file"))
467 self.singlePdbResults(fullFile, -1, self.getParam(
"final_md_frame_output_file"))
469 self.bestMdScore = round(bestMdScore, 2)
470 self.bestMdRmsd = round(bestRmsd, 2)
471 self.bestMdScoreFrame = bestScoreFrame
472 self.bestMdRmsdFrame = bestRmsdFrame
474 def readCgTrajectories(self, atomList, flexibleAtoms):
476 cgFileName = self.getParam(
"cg_output_file")
477 bestCgScore = 10000000
479 bestCgRmsd = 10000000
482 outputDir = self.getParam(
"output_directory")
483 trajectoryFile = self.getParam(
"md_trajectory_output_file")
484 fullFile = os.path.join(outputDir, trajectoryFile)
485 rh = RMF.open_rmf_file(fullFile)
486 IMP.rmf.set_hierarchies(rh, [self.protein])
487 framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam(
"cg_interval")), self.protein)
489 skipCgDomino = int(self.getParam(
"skip_cg_domino"))
491 if (len(framesToRead) > 0):
492 for cgNumber
in framesToRead:
494 outputDir = self.getParam(
"output_directory")
495 fullCgFileName = os.path.join(outputDir,
"%s%s" % (cgFileName, cgNumber))
496 rh = RMF.open_rmf_file(fullCgFileName)
497 IMP.rmf.set_hierarchies(rh, [self.protein])
500 frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)
503 if (frameCount > 20):
504 startFrameCount = frameCount - 20
506 for i
in range(startFrameCount, frameCount):
510 cgScoreOutputFile = os.path.join(outputDir,
"%s%s" % (self.getParam(
"cg_score_output_file"), cgNumber))
511 [cgScore, cgScoreFrame, cgRmsd, cgRmsdFrame] = self.readTrajectoryFile(atomList, rh, cgFrames, cgScoreOutputFile, skipCgDomino, flexibleAtoms)
512 print "cg number %s rmsd %s score %s" % (cgNumber, cgRmsd, cgScore)
514 if (cgScore < bestCgScore):
515 bestCgScore = cgScore
516 bestCgScoreFile = fullCgFileName
517 if (cgRmsd < bestCgRmsd):
519 bestCgRmsdFile = fullCgFileName
522 self.singlePdbResults(bestCgScoreFile, -1, self.getParam(
"best_cg_score_output_file"))
523 self.singlePdbResults(bestCgRmsdFile, -1, self.getParam(
"best_cg_rmsd_output_file"))
524 self.singlePdbResults(
"%s%s" % (cgFileName, framesToRead[-1]), -1, self.getParam(
"final_cg_frame_output_file"))
525 finalCgRmsd = self.calculateNativeRmsd(flexibleAtoms)
526 print "final cg rmsd is %s " % finalCgRmsd
527 self.bestCgScore = round(bestCgScore, 2)
528 self.bestCgRmsd = round(bestCgRmsd, 2)
529 self.bestCgScoreFile = bestCgScoreFile
530 self.bestCgRmsdFile = bestCgRmsdFile
533 def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):
535 fullTrajectoryFile = os.path.join(self.getParam(
"output_directory"), trajectoryFile)
536 fullOutputFile = os.path.join(self.getParam(
"output_directory"), outputPdbFile)
537 rh = RMF.open_rmf_file(fullTrajectoryFile)
538 IMP.rmf.set_hierarchies(rh, [self.protein])
540 frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1
545 def calculateRmsd(self, otherProtein, flexibleAtoms):
546 otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(otherProtein)
549 for pName
in otherNamesToParticles.keys():
550 if (flexibleAtoms.has_key(pName) == 0):
552 otherParticle = otherNamesToParticles[pName]
553 modelParticle = self.namesToParticles[pName]
559 def calculateNativeRmsd(self, flexibleAtoms):
561 if (self.wroteNativeProtein == 0):
562 pdbName = self.getParam(
"native_pdb_input_file")
565 self.wroteNativeProtein = 1
567 return self.calculateRmsd(self.nativeProtein, flexibleAtoms)
569 def calculateTrajectoryRmsd(self, trajectoryFile, trajectoryFrame, flexibleAtoms):
570 pdbName = self.getParam(
"native_pdb_input_file")
573 outputDir = self.getParam(
"output_directory")
574 fullFile = os.path.join(outputDir, trajectoryFile)
575 print "open calculate traj rmf %s" % fullFile
576 rh = RMF.open_rmf_file(fullFile)
577 IMP.rmf.set_hierarchies(rh, [otherProtein])
578 if (trajectoryFrame == -1):
579 trajectoryFrame = IMP.rmf.get_number_of_frames(rh, otherProtein) - 1
581 return self.calculateRmsd(otherProtein, flexibleAtoms)
584 def createAllSubsetAssignments(self):
589 leafNodeIndexList = self.getLeafNodeIndexList()
591 for nodeIndex
in leafNodeIndexList:
593 subset = self.mt.get_vertex_name(nodeIndex)
594 particleNameList = []
595 for particle
in subset:
596 particleNameList.append(self.quickParticleName(particle))
597 print "creating initial assignments for leaf %s" % nodeIndex
599 assignments = self.createUniqueLeafAssignments(particleNameList, self.particleInfo)
600 filteredAssignments = self.filterAssignments(assignments, subset, nodeIndex, rssft)
604 for assignment
in filteredAssignments:
605 packedAssignmentContainer.add_assignment(assignment)
606 lat.set_assignments(subset, packedAssignmentContainer)
612 root = self.mt.get_vertices()[-1]
613 completeAc = self.loadAssignments(root)
614 self.completeAc = completeAc
616 def loadAssignments(self, nodeIndex):
618 children = self.mt.get_out_neighbors(nodeIndex)
619 subset = self.mt.get_vertex_name(nodeIndex)
620 heapCount = int(self.getParam(
"heap_count"))
623 print "computing assignments for leaf %s" % nodeIndex
625 self.sampler.load_vertex_assignments(nodeIndex, mine)
626 print "leaf node %s has %s leaf assignments" % (nodeIndex, mine.get_number_of_assignments())
628 if (children[0] > children[1]):
629 children = [children[1], children[0]]
631 firstAc = self.loadAssignments(children[0])
632 secondAc = self.loadAssignments(children[1])
634 self.sampler.load_vertex_assignments(nodeIndex, firstAc, secondAc, mine)
636 timeDifference = self.logTimePoint(0)
637 print "Done Parent %s Assignments %s first child %s second child %s time %s" % (nodeIndex, mine.get_number_of_assignments(), firstAc.get_number_of_assignments(),
638 secondAc.get_number_of_assignments(), timeDifference)
639 self.totalAssignments += mine.get_number_of_assignments()
646 def writeOutput(self, flexibleAtoms, startTime):
648 bestDominoScore = 100000
650 finalAssignments = self.completeAc.get_assignments()
651 for assignment
in finalAssignments:
653 score = self.model.evaluate(
False)
654 if (score < bestDominoScore):
655 bestAssignment = assignment
656 bestDominoScore = round(score, 2)
657 print "best domino score is %s " % bestDominoScore
658 print "best md score is %s" % self.bestMdScore
659 print "best md rmsd is %s" % self.bestMdRmsd
660 print "best cg score is %s" % self.bestCgScore
661 print "best cg rmsd is %s" % self.bestCgRmsd
662 print "merge tree contained %s total assignments" % self.totalAssignments
665 dominoVsMdRmsd = round(self.calculateTrajectoryRmsd(self.getParam(
"md_trajectory_output_file"), self.bestMdScoreFrame, flexibleAtoms), 2)
668 IMP.atom.write_pdb(self.protein, os.path.join(self.getParam(
"output_directory"), self.getParam(
"minimum_domino_score_pdb")))
670 dominoVsCgRmsd = round(self.calculateTrajectoryRmsd(self.bestCgScoreFile, -1, flexibleAtoms), 2)
671 dominoMinimizedScore = round(self.model.evaluate(
False), 2)
672 dominoRmsd = round(self.calculateNativeRmsd(flexibleAtoms), 2)
674 runTime = round(time.time() - startTime, 2)
675 print "final domino score (after cg): %s" % dominoMinimizedScore
676 print "final domino rmsd: %s" % dominoRmsd
677 print "best domino rmsd with best md score: %s" % dominoVsMdRmsd
678 print "domino rmsd with best cg score: %s" % dominoVsCgRmsd
679 print "Final Results\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.bestMdScore, self.bestMdRmsd, self.bestCgScore, self.bestCgRmsd, bestDominoScore, dominoRmsd, dominoMinimizedScore, dominoVsCgRmsd, self.totalAssignments, self.maxMem, runTime)
684 def createSubsetFromParticles(self, particleNames):
685 particleNameList = particleNames.split(
" ")
687 for particleName
in particleNameList:
688 particle = self.namesToParticles[particleName]
689 particleList.append(particle)
692 return [subset, particleList]
694 def createHdf5AssignmentContainer(self, index, particleNames, read):
695 root = self.getAssignmentContainerRoot(index, read)
696 print "got root for index %s" % index
699 dataset = root.get_child_index_data_set_2d(str(index))
701 dataset = root.add_child_index_data_set_2d(str(index))
702 print "added child index dataset"
704 [subset, particleOrder] = self.createSubsetFromParticles(particleNames)
705 print "created subset for index %s" % index
706 hdf5Ac = IMP.domino.create_assignments_container(dataset, subset, particleOrder)
707 print "returning from create"
709 for nextInt
in order:
710 print "next int is %s" % nextInt
711 return [subset, hdf5Ac]
713 def loadAssignmentsParallel(self, nodeIndex, particleInfo, mtIndexToNodeInfo, mtIndexToSubsetOrder, mtIndexToParticles):
716 if (mtIndexToNodeInfo[nodeIndex].has_key(
"firstChild") == 0):
717 print "writing file for leaf index %s" % nodeIndex
718 return self.createAssignmentsParallel(particleInfo, nodeIndex, mtIndexToParticles)
721 beginTime = self.logTimePoint(1)
722 firstChildIndex = mtIndexToNodeInfo[nodeIndex][
"firstChild"]
723 [firstSubset, firstAc] = self.createHdf5AssignmentContainer(firstChildIndex, mtIndexToSubsetOrder[firstChildIndex], 1)
724 firstAcCreateTime = self.logTimePoint(0)
726 secondChildIndex = mtIndexToNodeInfo[nodeIndex][
"secondChild"]
727 [secondSubset, secondAc] = self.createHdf5AssignmentContainer(secondChildIndex, mtIndexToSubsetOrder[secondChildIndex], 1)
728 secondAcCreateTime = self.logTimePoint(0)
730 print "getting assignments for nodeIndex %s first child %s second child %s" % (nodeIndex, firstChildIndex, secondChildIndex)
731 firstChildParticles = mtIndexToSubsetOrder[firstChildIndex]
732 secondChildParticles = mtIndexToSubsetOrder[secondChildIndex]
733 myParticles = mtIndexToParticles[nodeIndex]
735 print "first child particles %s\nsecond child particles %s\nmy particles; %s\n" % (firstChildParticles, secondChildParticles, myParticles)
736 for p
in firstSubset:
737 print "next particle in first subset: %s" % self.quickParticleName(p)
740 for p
in secondSubset:
741 print "next particle in second subset: %s" % self.quickParticleName(p)
743 for assignment
in firstAc.get_assignments():
744 print "next assignment for first child %s: %s" % (firstChildIndex, assignment)
747 for assignment
in secondAc.get_assignments():
748 print "next assignment for second child %s: %s" % (secondChildIndex, assignment)
750 [mySubset, myAc] = self.createHdf5AssignmentContainer(nodeIndex, mtIndexToParticles[nodeIndex], 0)
751 print "done creating hdf5"
752 prepTime = self.logTimePoint(0)
754 rssf = rssft.get_subset_filter(mySubset, [])
760 heapTime = self.logTimePoint(0)
762 addTime = self.logTimePoint(0)
763 for assignment
in myAc.get_assignments():
764 print "loadAssignmentsParallel next assignment for %s: %s" % (nodeIndex, assignment)
765 doneTime = self.logTimePoint(0)
766 firstChildCount = firstAc.get_number_of_assignments()
767 secondChildCount = secondAc.get_number_of_assignments()
768 print "first count: %s second count: %s begin: %s firstAc: %s secondAc: %s prep: %s heap: %s add: %s done: %s" % (firstChildCount, secondChildCount, beginTime, firstAcCreateTime, secondAcCreateTime, prepTime, heapTime, addTime, doneTime)
769 subsetOrder = self.getSubsetOrderList(mySubset)
773 def createAssignmentsParallel(self, particleInfo, nodeIndex, mtIndexToParticles):
776 subsetName = mtIndexToParticles[nodeIndex]
777 print "starting assignments parallel leaf index %s subset name %s" % (nodeIndex, subsetName)
778 [subset, particleList] = self.createSubsetFromParticles(subsetName)
781 particleNameList = subsetName.split(
" ")
785 finalAssignments = self.createUniqueLeafAssignments(particleNameList, particleInfo)
794 filteredAssignments = self.filterAssignments(finalAssignments, subset, nodeIndex, rssft)
795 root = self.getAssignmentContainerRoot(nodeIndex, 0)
796 dataset= root.add_child_index_data_set_2d(str(nodeIndex))
797 dataset.set_size([0, len(subset)])
799 hdf5AssignmentContainer = IMP.domino.create_assignments_container(dataset, subset, particleOrder)
800 for assignment
in filteredAssignments:
801 hdf5AssignmentContainer.add_assignment(assignment)
802 for assignment
in hdf5AssignmentContainer.get_assignments():
803 print "hdf5 assignment container node %s next assignmet %s" % (nodeIndex, assignment)
806 subsetOrder = self.getSubsetOrderList(subset)
807 print "leaf node returning with order %s" % subsetOrder
816 def writePymolData(self):
818 outputDir = self.getParam(
"output_directory")
820 pymolInteractions = self.getParam(
"pymol_interactions_file")
826 pymolSubsets = self.getParam(
"pymol_subsets_file")
834 def cleanVertexName(self, vertexName):
836 nodeRe = re.compile(
'Subset\(\[(.*?)\s*\]')
837 secondNodeRe = re.compile(
'\[(.*?)\s*\]')
838 node = nodeRe.search(str(vertexName))
839 secondNode = secondNodeRe.search(str(vertexName))
843 foundName = node.group(1)
845 foundName = secondNode.group(1)
846 vertexNameFinal = foundName.replace(
'"',
'')
847 return vertexNameFinal
849 def getSubsetOrderList(self, subset):
851 for particle
in subset:
852 name = self.quickParticleName(particle)
853 subsetOrderList.append(name)
854 subsetOrder =
" ".join(subsetOrderList)
857 def checkAssignments(self, subset, nodeIndex, particleOrder):
858 print "reading back in assignments for leaf index %s" % nodeIndex
859 root = self.getAssignmentContainerRoot(nodeIndex, 1)
860 dataset = root.get_child_index_data_set_2d(str(nodeIndex))
861 hdf5 = IMP.domino.create_assignments_container(dataset, subset, particleOrder)
862 for assignment
in hdf5.get_assignments():
863 print "leaf index %s read back in %s" % (nodeIndex, assignment)
865 def createSamplerLite(self):
868 if (self.getParam(
"cross_subset_filtering") == 1):
869 s.set_use_cross_subset_filtering(1)
873 def createSiblingMap(self, parentIndex):
875 children = self.mt.get_out_neighbors(parentIndex)
876 if (len(children) > 0):
877 firstChild = children[0]
878 secondChild = children[1]
879 self.parentSiblingMap[firstChild] = {}
880 self.parentSiblingMap[firstChild][
"sibling"] = secondChild
881 self.parentSiblingMap[firstChild][
"parent"] = parentIndex
883 self.parentSiblingMap[secondChild] = {}
884 self.parentSiblingMap[secondChild][
"sibling"] = firstChild
885 self.parentSiblingMap[secondChild][
"parent"] = parentIndex
886 print "created map for parent %s first child %s second child %s" % (parentIndex, firstChild, secondChild)
888 self.parentSiblingMap[parentIndex][
"firstChild"] = firstChild
889 self.parentSiblingMap[parentIndex][
"secondChild"] = secondChild
891 self.createSiblingMap(firstChild)
892 self.createSiblingMap(secondChild)
894 def getMtIndexToNodeInfo(self):
895 return self.parentSiblingMap
897 def getLeafParentNodeIndexList(self):
898 leafParentNodeIndexList = {}
899 leafNodeIndexList = self.getLeafNodeIndexList()
900 for leafIndex
in leafNodeIndexList:
901 parent = self.parentSiblingMap[leafIndex][
"parent"]
902 leafParentNodeIndexList[parent] = 1
903 return leafParentNodeIndexList
905 def getMtIndexToNameList(selt):
907 for index
in self.mt.get_vertices():
908 name = self.mt.get_vertex_name(index)
909 mtIndexToNames[index] = name
910 return mtIndexToNames
913 def getAssignmentContainerRoot(self, subsetIndex, read):
914 outputDir = self.getParam(
"output_directory")
915 filePrefix = self.getParam(
"subset_assignment_output_file")
916 assignmentFileName = os.path.join(outputDir,
"%s%s" % (filePrefix, subsetIndex))
917 print "creating hdf5 file with name %s" % assignmentFileName
920 root = RMF.open_hdf5_file(assignmentFileName)
922 root= RMF.create_hdf5_file(assignmentFileName)
929 def writeVisualization(self):
931 self.writeCytoscapeIgInput()
932 self.writeCytoscapeJtInput()
933 self.writeCytoscapeMtInput()
934 self.writeCytoscapeScripts()
935 self.writePymolData()
937 def writeCytoscapeScripts(self):
938 outputDir = self.getParam(
"output_directory")
939 mTreeCytoscapeInput = self.getParam(
"mtree_cytoscape_input_file")
940 mTreeCytoscapeAssignments = self.getParam(
"mtree_cytoscape_assignment_file")
941 mTreeCytoscapeAtomChains = self.getParam(
"mtree_cytoscape_atom_chain_file")
942 mTreeCytoscapeAtomSummary = self.getParam(
"mtree_cytoscape_atom_summary_file")
944 mTreeCytoscapeScript = self.getParam(
"mtree_cytoscape_script")
945 mTreeFh = open(os.path.join(outputDir, mTreeCytoscapeScript),
'w')
946 mTreeFh.write(
"network import file=%s\n" % os.path.join(outputDir, mTreeCytoscapeInput))
947 mTreeFh.write(
"node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAssignments))
948 mTreeFh.write(
"node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAtomSummary))
949 mTreeFh.write(
"node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAtomChains))
950 mTreeFh.write(
"layout jgraph-tree\n")
953 def getGraphStructure(self, graph, fileName, separator):
955 graphLogWrite = open(fileName,
'w')
956 graph.show(graphLogWrite)
957 graphLogWrite.close()
960 graphLogRead = open(fileName,
'r')
962 nodeRe = re.compile('^(\d+)\[label\=\"\[*(.*?)\s*\]*\"')
963 separatorEscape =
"\\" + separator
964 edgeString =
"^(\d+)\-%s(\d+)" % separatorEscape
965 edgeRe = re.compile(edgeString)
970 for line
in graphLogRead:
973 node = nodeRe.search(line)
975 nodeNumber = node.group(1)
976 atomString = node.group(2)
977 nodesToNames[nodeNumber] = atomString
981 edge = edgeRe.search(line)
983 firstNode = edge.group(1)
984 secondNode = edge.group(2)
986 if (nodesToNodes.has_key(firstNode)):
987 firstNodeDict = nodesToNodes[firstNode]
988 firstNodeDict[secondNode] = 1
989 nodesToNodes[firstNode] = firstNodeDict
991 return [nodesToNames, nodesToNodes]
993 def writeEdgeFile(self, nodesToNodes, edgeFileName):
995 outputDir = self.getParam(
"output_directory")
996 graphInputFile = open(os.path.join(outputDir, edgeFileName),
'w')
997 for firstNode
in nodesToNodes.keys():
998 nodeDict = nodesToNodes[firstNode]
999 for secondNode
in nodeDict.keys():
1000 graphInputFile.write(
"%s ttt %s\n" % (firstNode, secondNode))
1001 graphInputFile.close()
1003 def writeCytoscapeIgInput(self):
1004 outputDir = self.getParam(
"output_directory")
1005 igOutputFile = self.getParam(
"ig_output_file")
1006 [nodesToNames, nodesToNodes] = self.getGraphStructure(self.ig, os.path.join(outputDir, igOutputFile),
"-")
1008 self.writeEdgeFile(nodesToNodes, self.getParam(
"ig_cytoscape_input_file"))
1011 igResiduesFile = self.getParam(
"ig_cytoscape_residues_file")
1012 peptideChain = self.getParam(
"peptide_chain")
1014 subsetResidueFile = open(os.path.join(outputDir, igResiduesFile),
'w')
1015 subsetResidueFile.write(
"ResidueNumber\n")
1016 for nodeNumber
in nodesToNames.keys():
1017 nodeName = nodesToNames[nodeNumber]
1019 [nodeChain, residueNumber, nodeAtom] = atomicDominoUtilities.getAtomInfoFromName(nodeName)
1020 if (nodeChain == peptideChain):
1021 subsetResidueFile.write(
"%s = %s\n" % (nodeNumber, residueNumber))
1023 subsetResidueFile.write(
"%s = 100\n" % nodeNumber)
1026 def writeCytoscapeMtInput(self):
1027 outputDir = self.getParam(
"output_directory")
1028 mTreeOutputFile = self.getParam(
"mtree_output_file")
1029 [nodesToNames, nodesToNodes] = self.getGraphStructure(self.mt, os.path.join(outputDir, mTreeOutputFile),
">")
1031 self.writeEdgeFile(nodesToNodes, self.getParam(
"mtree_cytoscape_input_file"))
1032 self.writeNodeNameAttributes(nodesToNames, self.getParam(
"mtree_cytoscape_atom_name_file"), self.getParam(
"mtree_cytoscape_atom_summary_file"),
1033 self.getParam(
"mtree_cytoscape_atom_chain_file"))
1035 def writeCytoscapeJtInput(self):
1037 outputDir = self.getParam(
"output_directory")
1038 jTreeOutputFile = self.getParam(
"jtree_output_file")
1039 [nodesToNames, nodesToNodes] = self.getGraphStructure(self.jt, os.path.join(outputDir, jTreeOutputFile),
"-")
1041 self.writeEdgeFile(nodesToNodes, self.getParam(
"jtree_cytoscape_input_file"))
1043 self.writeNodeNameAttributes(nodesToNames, self.getParam(
"jtree_cytoscape_atom_name_file"), self.getParam(
"jtree_cytoscape_atom_summary_file"),
1044 self.getParam(
"jtree_cytoscape_atom_chain_file"))
1047 jtreeCytoscapeEdgeFile = self.getParam(
"jtree_cytoscape_edge_file")
1048 edgeWeightFile = open(os.path.join(outputDir, jtreeCytoscapeEdgeFile),
'w')
1049 edgeWeightFile.write(
"SubsetOverlap (class=Integer)\n")
1050 for firstNode
in nodesToNodes.keys():
1051 nodeDict = nodesToNodes[firstNode]
1052 for secondNode
in nodeDict.keys():
1053 firstNodeAtoms = nodesToNames[firstNode].split(
" ")
1054 secondNodeAtoms = nodesToNames[secondNode].split(
" ")
1055 intersection = [val
for val
in firstNodeAtoms
if val
in secondNodeAtoms]
1056 edgeWeightFile.write(
"%s (pp) %s = %s\n" % (firstNode, secondNode, len(intersection)))
1057 edgeWeightFile.close()
1059 def getAtomTypeCounts(self, atomNames):
1061 atomNames = atomNames.lstrip(
'[')
1062 atomNames = atomNames.rstrip(
']')
1063 atomNames = atomNames.rstrip()
1065 atomList = atomNames.split(
" ")
1066 peptideAtomCount = 0
1067 proteinAtomCount = 0
1068 for atom
in atomList:
1069 [chain, residue, atom] = atomicDominoUtilities.getAtomInfoFromName(atom)
1070 if (chain == self.getParam(
"peptide_chain")):
1071 peptideAtomCount += 1
1073 proteinAtomCount += 1
1074 return [peptideAtomCount, proteinAtomCount]
1077 def writeNodeNameAttributes(self, nodesToNames, atomNameFile, atomSummaryFile, atomChainFile):
1079 outputDir = self.getParam(
"output_directory")
1080 subsetAtomNameFile = open(os.path.join(outputDir, atomNameFile),
'w')
1081 subsetAtomSummaryFile = open(os.path.join(outputDir, atomSummaryFile),
'w')
1082 subsetAtomChainFile = open(os.path.join(outputDir, atomChainFile),
'w')
1084 subsetAtomNameFile.write(
"Atom names\n")
1085 subsetAtomSummaryFile.write(
"Atom Summary\n")
1086 subsetAtomChainFile.write(
"Atom chain\n")
1087 for node
in nodesToNames.keys():
1088 atomNames = nodesToNames[node]
1089 subsetAtomNameFile.write(
"%s = %s\n" % (node, atomNames))
1090 [peptideAtomCount, proteinAtomCount] = self.getAtomTypeCounts(atomNames)
1092 subsetAtomSummaryFile.write(
"%s = %sp %sl\n" % (node, proteinAtomCount, peptideAtomCount))
1095 if (proteinAtomCount == 0):
1096 subsetAtomChainFile.write(
"%s = 1\n" % node)
1097 elif (peptideAtomCount == 0):
1098 subsetAtomChainFile.write(
"%s = 2\n" % node)
1100 subsetAtomChainFile.write(
"%s = 3\n" % node)
1102 subsetAtomChainFile.close()
1103 subsetAtomSummaryFile.close()
1104 subsetAtomNameFile.close()