doc/html/atomicDomino_8py_source.html

import IMP

import subprocess

import random

import IMP.domino

import IMP.core

import IMP.rmf

import RMF

import time

import IMP.algebra

import types

import re

import sys

import operator

import os

import resource

import atomicDominoUtilities


class AtomicDomino:


    def __init__(self, model, protein, parameterFileName):

        self.model = model

        self.protein = protein

        self.namesToParticles = atomicDominoUtilities.makeNamesToParticles(protein)

        self.readParameters(parameterFileName)

        self.wroteNativeProtein = 0

        self.maxMem = 0


    def readParameters(self, parameterFileName):

        self.parameters = atomicDominoUtilities.readParameters(parameterFileName)


    def logMemory(self):

        currentMem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

        currentMem *= 10**-6

        print "log memory: %s" % currentMem

        if (currentMem > self.maxMem):

            self.maxMem = currentMem


    #Get parameter value. Throws dictionary exception if not found

    def getParam(self, paramName):

        paramValue = self.parameters[paramName]

        return paramValue


    def loadDominoHelpers(self):

        self.subsetStateScores = {}

        self.subsetRestraintScores = {}

        self.subsetStateFailures = {}

        self.nodesToAssignmentCount = {}


        self.totalAssignments = 0

        outputDir = self.getParam("output_directory")

        mtreeCytoscapeAssignmentFile = self.getParam("mtree_cytoscape_assignment_file")

        self.mtreeCytoscapeAssignmentFh = open(os.path.join(outputDir, mtreeCytoscapeAssignmentFile), 'w')

        self.mtreeCytoscapeAssignmentFh.write("Assignments\n")


        self.mtNamesToIndices = {}


        self.subsetNamesToAssignmentFiles = {}


    #Simple way to calculate run-time for certain methods and procedures.

    #When reset is 0, compares the previous saved time to the current one

    #and returns the difference (saving the current time too).

    #When reset is 1, just saves the current time.

    def logTimePoint(self, reset):

        newTime = time.time()

        if (reset == 0):

            timeDifference = newTime - self.timePoint

            timeDifference = round(timeDifference, 0)

            self.timePoint = newTime

            return timeDifference

        else:


            self.timePoint = newTime

            return newTime


    def createParticleStatesTable(self):


        for particleName in self.particleInfo.keys():


            statesToCenters = {}

            dataArray = self.particleInfo[particleName]

            for i in range(len(dataArray)):

                [state, center] = dataArray[i]

                statesToCenters[state] = center


            statesList = []

            for stateIndex in sorted(statesToCenters.keys()):

                vector3d = IMP.algebra.Vector3D(statesToCenters[stateIndex])

                statesList.append(vector3d)

                #print "appending particle %s state %s center %s" %  (particleName, stateIndex, vector3d)

            xyzStates = IMP.domino.XYZStates(statesList)

            self.dominoPst.set_particle_states(self.namesToParticles[particleName], xyzStates)


    def quickParticleName(self, particle):

        return atomicDominoUtilities.quickParticleName(particle)


    def filterAssignments(self, assignments, subset, nodeIndex, rssft):


        filteredSubsets = []

        restraintList = []


        #make dependency graph for stats

        for r in IMP.get_restraints([self.model.get_root_restraint_set()]):

            restraintList.append(r)

        dg = IMP.get_dependency_graph(restraintList)


        stateCounter = 0

        passedCounter = 0


        #create hdf5AssignmentContainer


        sFilter = rssft.get_subset_filter(subset, filteredSubsets)

        #check each unique state to see if passes filter

        filteredAssignments = []

        for assignment in assignments:


            if (sFilter == None or sFilter.get_is_ok(assignment)):

                #add to assignment container if it passes

                passedCounter += 1

                filteredAssignments.append(assignment)


            stateCounter += 1

        fraction = (passedCounter * 1.0) / (stateCounter * 1.0)

        print "%s states passed out of %s total for this subset (fraction %s)" % (passedCounter, stateCounter, fraction)

        if (passedCounter == 0):

            print "subset %s had 0 assignments (out of %s) pass. Exiting..." % (subset, stateCounter)

            sys.exit()


        return filteredAssignments


    #Convert the name of the subset to something more readable. Currently returning name as sorted list of atoms

    def quickSubsetName(self, subset):

        cleanName = self.cleanVertexName(subset)

        atomNameList = cleanName.split(" ")

        sortedList = sorted(atomNameList)


        name = " ".join(sortedList)

        return name


    def getNodeIndexList(self):

        return self.mt.get_vertices()


    def getLeafNodeIndexList(self):

        leafNodeIndexList = []

        for subset in self.subsets:

            index = self.getMtIndexForSubset(self.quickSubsetName(subset))

            leafNodeIndexList.append(index)

        return leafNodeIndexList


    #Create Domino sampler and give it all the other domino objects it needs

    def createSampler(self):

        s=IMP.domino.DominoSampler(self.model, self.dominoPst)


        s.set_merge_tree(self.mt)

        filterTables = []

        filterTables.append(self.rssft)

        s.set_subset_filter_tables(filterTables)


        s.set_assignments_table(self.lat)


        if (self.getParam("cross_subset_filtering") == 1):

            s.set_use_cross_subset_filtering(1)


        self.sampler = s


    #Use the model restraint set to get the interaction graph, junction tree, and merge tree, and also get subsets

    #from the junction tree and return them

    def createSubsets(self):

        self.initializeParticleStatesTable()

        ig = IMP.domino.get_interaction_graph([self.model.get_root_restraint_set()], self.dominoPst)

        print "interaction graph:"

        ig.show()

        jt = IMP.domino.get_junction_tree(ig)

        print "junction tree:"

        jt.show()

        self.subsetNamesToSubsets = {}

        mt = IMP.domino.get_balanced_merge_tree(jt)


        #make map of vertex indices to atoms in subsets

        for index in mt.get_vertices():

            subset = mt.get_vertex_name(index)

            subsetName = self.cleanVertexName(subset)

            self.mtNamesToIndices[subsetName] = index


        subsets = IMP.domino.get_subsets(jt)

        for subset in subsets:

            print "created subset %s" % subset

            subsetName = self.quickSubsetName(subset)

            self.subsetNamesToSubsets[subsetName] = subset


        print "merge tree:"

        mt.show()


        self.ig = ig

        self.jt = jt

        self.mt = mt

        self.subsets = subsets


        self.parentSiblingMap = {}

        self.parentSiblingMap[self.mt.get_vertices()[-1]] = {}

        self.createSiblingMap(self.mt.get_vertices()[-1])


    def getMtIndexForSubset(self, subsetName):

        for index in self.mt.get_vertices():

            mtNode = self.mt.get_vertex_name(index)

            mtName = self.cleanVertexName(mtNode)

            mtName = mtName.rstrip()

            mtName = mtName.lstrip()


            mtNameList = mtName.split(" ")

            subsetNameList = subsetName.split(" ")

            mtNameListSorted = sorted(mtNameList)

            subsetNameListSorted = sorted(subsetNameList)

            if (" ".join(subsetNameListSorted) == " ".join(mtNameListSorted)):


                return index

        print "did not find merge tree index for subset name %s" % subsetName

        sys.exit()


    def getDominoParticleNames(self):

        particles = self.dominoPst.get_particles()

        particleNameList = []

        for particle in particles:

            pName = self.quickParticleName(particle)

            particleNameList.append(pName)


        return particleNameList


    def getSubsetNameList(self):

        subsetNameList = []

        for subset in self.subsets:

            name = self.quickSubsetName(subset)

            subsetNameList.append(name)

        return subsetNameList


    def getMtIndexToParticles(self):


        mtIndexToParticles = {}

        allVertices = self.mt.get_vertices()

        for nodeIndex in allVertices:

            subset = self.mt.get_vertex_name(nodeIndex)

            particleList = self.quickSubsetName(subset)

            mtIndexToParticles[nodeIndex] = particleList

            print "createtMtIndexToParticles: index %s is particleList %s " % (nodeIndex, particleList)


        return mtIndexToParticles


    def readTrajectoryFile(self, atomList, rh, frames, scoreOutputFile, skipDomino, flexibleAtoms):


        bestScore = 100000

        bestRmsd = 10000

        bestScoreFrame = 0

        bestRmsdFrame = 0

        scoreOutputFh = open(scoreOutputFile, 'w')

        for i in frames:

            try:

                IMP.rmf.load_frame(rh, i, self.protein)

            except Exception:

                print "execption in loading frame %s" % i

                continue

            score = self.model.evaluate(False)

            leaves = IMP.atom.get_leaves(self.protein)

            #for leaf in leaves:

            #    xyzD = IMP.core.XYZ.decorate_particle(leaf)

            #    print "read trajectory file: coordinates for frame %s particle %s are %s" % (i, self.quickParticleName(leaf), xyzD.get_coordinates())


            rmsd = self.calculateNativeRmsd(flexibleAtoms)

            scoreOutputFh.write("%s\t%s\t%s\n" % (i, score, rmsd))

            if (score < bestScore):

                bestScore = score

                bestScoreFrame = i


            if (rmsd < bestRmsd):

                bestRmsd = rmsd

                bestRmsdFrame = i

            if (skipDomino == 0):

                for atomName in atomList:


                    particle = self.namesToParticles[atomName]


                    #get grid index and coordinates

                    gridIndex = self.snapToGrid(particle)

                    center = self.grid.get_center(gridIndex)

                    pythonCenter = []

                    for coordinate in center:

                        pythonCenter.append(coordinate)


                    #grid indices are mapped to coordinate states. Check if we've seen this grid index

                    if (self.particleStatesSeen[atomName].has_key(gridIndex) == 0):

                        #set this particle state index to size of the dictionary, which effectively increments the index with each new state

                        currentSize = len(self.particleStatesSeen[atomName].keys())

                        self.particleStatesSeen[atomName][gridIndex] = currentSize

                    state = self.particleStatesSeen[atomName][gridIndex]

                    self.particleInfo[atomName].append([state, pythonCenter])

            else:

                print "didn't add domino states due to skip parameters"

        return [bestScore, bestScoreFrame, bestRmsd, bestRmsdFrame]


    #Get the grid index for the given particle. Returns an integer that can be used to get the center

    #of the grid space for discretizing the particle

    def getParticleGridIndex(self, leaf):

        xyzDecorator = IMP.core.XYZ.decorate_particle(leaf)

        coordinates = xyzDecorator.get_coordinates()

        extendedIndex = 0

        extendedIndex = self.grid.get_extended_index(coordinates)

        if (self.grid.get_has_index(extendedIndex) == 0):

            if (self.getParam("grid_type") == "sparse"):  #mostly working with sparse grids now

                self.grid.add_voxel(extendedIndex, 1)

            else:

                self.grid.add_voxel(extendedIndex)


        index = self.grid.get_index(extendedIndex)

        return index


    #Set coordinates of this atom to be the center of the grid space containing it (effectiely discretizing the system)

    def snapToGrid(self, particle):


        index  = self.getParticleGridIndex(particle)

        center = self.grid.get_center(index)

        xyzDecorator = IMP.core.XYZ.decorate_particle(particle)

        xyzDecorator.set_coordinates(center)


        return index


    #Take initial protein and snap every atom to the center of its gridpoint

    def discretizeNativeProtein(self):


        outputDir = self.getParam("output_directory")

        nativeSnappedFile = self.getParam("native_protein_snapped_output_file")


        leaves = IMP.atom.get_leaves(self.protein)

        for leaf in leaves:

            self.snapToGrid(leaf)


        IMP.atom.write_pdb(self.protein, os.path.join(outputDir, nativeSnappedFile))


    #Get particle representing the alpha carbon at the center of the peptide

    def getPeptideCa(self):


        #get all residue indices in the peptide

        residues = IMP.atom.get_by_type(self.protein, IMP.atom.RESIDUE_TYPE)

        peptideIndicesToResidues = {}

        for residueH in residues:

            chain = IMP.atom.get_chain(residueH)

            chainId = chain.get_id()

            residue = residueH.get_as_residue()

            if (chainId == self.getParam("peptide_chain")):

                peptideIndicesToResidues[residue.get_index()] = residue


        #use the min and max residue indices to get the residue in the middle (rounding down)

        minPeptide = min(sorted(peptideIndicesToResidues.keys()))

        maxPeptide = max(sorted(peptideIndicesToResidues.keys()))

        centerPeptide = round(((maxPeptide - minPeptide) / 2 + minPeptide), 0)


        #get the particle corresponding to the ca atom at the middle residue and return it,

        centerName = atomicDominoUtilities.makeParticleName(self.getParam("peptide_chain"), int(centerPeptide), "CA")

        centerParticle = self.namesToParticles[centerName]

        return centerParticle


    #Create grid object that will be used to create discrete states for each particle

    def createGrid(self):


        protBb = IMP.atom.get_bounding_box(self.protein)


        gridSpacing = float(self.getParam("grid_spacing"))

        bufferSpace = float(self.getParam("grid_buffer_space"))


        protBb += bufferSpace #add buffer around grid

        g = 0

        if (self.getParam("grid_type") == "sparse"):

            ca = self.getPeptideCa()

            xyzCa = IMP.core.XYZ.decorate_particle(ca)

            g= IMP.algebra.SparseUnboundedIntGrid3D(gridSpacing, xyzCa.get_coordinates())

        else:

            g = IMP.algebra.DenseDoubleGrid3D(gridSpacing, protBb)


        self.grid = g


    #Create Particle States Table and for each particle in the system, add XYZStates with that particle's

    #initial location

    def initializeParticleStatesTable(self):


        dominoPst = IMP.domino.ParticleStatesTable()

        restrainedParticles = atomicDominoUtilities.getRestrainedParticles(self.protein, self.model, self.namesToParticles)


        for p in restrainedParticles:


            xyzD = IMP.core.XYZ.decorate_particle(p)

            xyz = IMP.core.XYZ(p).get_coordinates()

            xyzStates = IMP.domino.XYZStates([xyz])

            dominoPst.set_particle_states(p, xyzStates)


        self.dominoPst = dominoPst


    def createUniqueLeafAssignments(self, particleNameList, particleInfo):


        size =  len(particleInfo[particleNameList[0]])

        allAssignments = []


        for i in range(size):

            nextAssignment = []

            for particleName in particleNameList:

                dataArray = particleInfo[particleName]

                [state, center] = dataArray[i]

                nextAssignment.append(state)

            allAssignments.append(nextAssignment)


        #make unique assignments to avoid duplicates

        uniqueAssignments = {}

        for assignment in allAssignments:


            stateString = ""

            for index in assignment:

                stateString = stateString + str(index) + "_"

            uniqueAssignments[stateString] = assignment

        finalAssignments = []


        #add all assignments to final list

        for stateString in uniqueAssignments.keys():

            assignmentList = uniqueAssignments[stateString]

            assignment = IMP.domino.Assignment(assignmentList)

            finalAssignments.append(assignment)

        return finalAssignments


    #Read MD trajectory; for each particle, save all unique states, and for each subset, save all assignments

    def readMdTrajectory(self, atomList, flexibleAtoms):


        #open trajectory file

        outputDir = self.getParam("output_directory")

        trajectoryFile = self.getParam("md_trajectory_output_file")

        fullFile = os.path.join(outputDir, trajectoryFile)

        rh = RMF.open_rmf_file(fullFile)

        IMP.rmf.set_hierarchies(rh, [self.protein])

        framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam("md_interval")), self.protein)

        print "preparing to read md frames %s" % framesToRead

        #prepare data structures for tracking

        particleInfo = {} #for each particle, list where each entry corresponds to an md step, and its value [domino state, coordinates]

        particleStatesSeen = {} #for each particle, dictionary where the key is the grid index and value is domino state

        for atomName in atomList:

            particle = self.namesToParticles[atomName]

            particleInfo[atomName] = []

            particleStatesSeen[atomName] = {}


        self.particleStatesSeen = particleStatesSeen

        self.particleInfo = particleInfo


        #read trajectory file

        mdScoreOutputFile = os.path.join(outputDir, "%s" % self.getParam("md_score_output_file"))

        [bestMdScore, bestScoreFrame, bestRmsd, bestRmsdFrame] = self.readTrajectoryFile(atomList, rh, framesToRead, mdScoreOutputFile, 0, flexibleAtoms)


        #output best score information

        #print "try loading bad frame"

        self.singlePdbResults(fullFile, bestScoreFrame, self.getParam("best_md_score_output_file"))

        #self.singlePdbResults(fullFile, 10000, self.getParam("best_md_score_output_file"))


        self.singlePdbResults(fullFile, bestRmsdFrame, self.getParam("best_md_rmsd_output_file"))

        self.singlePdbResults(fullFile, -1, self.getParam("final_md_frame_output_file"))


        self.bestMdScore = round(bestMdScore, 2)

        self.bestMdRmsd = round(bestRmsd, 2)

        self.bestMdScoreFrame = bestScoreFrame

        self.bestMdRmsdFrame = bestRmsdFrame


    def readCgTrajectories(self, atomList, flexibleAtoms):


        cgFileName = self.getParam("cg_output_file")

        bestCgScore = 10000000

        bestCgScoreFile = ""

        bestCgRmsd = 10000000

        bestCgRmsdFile = ""


        outputDir = self.getParam("output_directory")

        trajectoryFile = self.getParam("md_trajectory_output_file")

        fullFile = os.path.join(outputDir, trajectoryFile)

        rh = RMF.open_rmf_file(fullFile)

        IMP.rmf.set_hierarchies(rh, [self.protein])

        framesToRead = atomicDominoUtilities.getMdIntervalFrames(rh, int(self.getParam("cg_interval")), self.protein)


        skipCgDomino = int(self.getParam("skip_cg_domino"))


        if (len(framesToRead) > 0):

            for cgNumber in framesToRead:

                #Open next cg trajectory

                outputDir = self.getParam("output_directory")

                fullCgFileName = os.path.join(outputDir, "%s%s" % (cgFileName, cgNumber))

                rh = RMF.open_rmf_file(fullCgFileName)

                IMP.rmf.set_hierarchies(rh, [self.protein])


                #Only look at the bottom 20 frames

                frameCount = IMP.rmf.get_number_of_frames(rh, self.protein)

                cgFrames = []

                startFrameCount = 0

                if (frameCount > 20):

                    startFrameCount = frameCount - 20


                for i in range(startFrameCount, frameCount):

                    cgFrames.append(i)


                #Process trajectory

                cgScoreOutputFile = os.path.join(outputDir, "%s%s" % (self.getParam("cg_score_output_file"), cgNumber))

                [cgScore, cgScoreFrame, cgRmsd, cgRmsdFrame] = self.readTrajectoryFile(atomList, rh, cgFrames, cgScoreOutputFile, skipCgDomino, flexibleAtoms)

                print "cg number %s rmsd %s score %s" % (cgNumber, cgRmsd, cgScore)

                #Update best score

                if (cgScore < bestCgScore):

                    bestCgScore = cgScore

                    bestCgScoreFile = fullCgFileName

                if (cgRmsd < bestCgRmsd):

                    bestCgRmsd = cgRmsd

                    bestCgRmsdFile = fullCgFileName


            #output best score information

            self.singlePdbResults(bestCgScoreFile, -1, self.getParam("best_cg_score_output_file"))

            self.singlePdbResults(bestCgRmsdFile, -1, self.getParam("best_cg_rmsd_output_file"))

            self.singlePdbResults("%s%s" % (cgFileName, framesToRead[-1]), -1, self.getParam("final_cg_frame_output_file"))

            finalCgRmsd = self.calculateNativeRmsd(flexibleAtoms)

            print "final cg rmsd is %s " % finalCgRmsd

        self.bestCgScore = round(bestCgScore, 2)

        self.bestCgRmsd = round(bestCgRmsd, 2)

        self.bestCgScoreFile = bestCgScoreFile

        self.bestCgRmsdFile = bestCgRmsdFile


    def singlePdbResults(self, trajectoryFile, frame, outputPdbFile):


        fullTrajectoryFile = os.path.join(self.getParam("output_directory"), trajectoryFile)

        fullOutputFile = os.path.join(self.getParam("output_directory"), outputPdbFile)

        rh = RMF.open_rmf_file(fullTrajectoryFile)

        IMP.rmf.set_hierarchies(rh, [self.protein])

        if (frame == -1):

            frame = IMP.rmf.get_number_of_frames(rh, self.protein) - 1

        IMP.rmf.load_frame(rh, frame, self.protein)

        IMP.atom.write_pdb(self.protein, fullOutputFile)


    def calculateRmsd(self, otherProtein, flexibleAtoms):

        otherNamesToParticles = atomicDominoUtilities.makeNamesToParticles(otherProtein)

        otherVector = []

        modelVector = []

        for pName in otherNamesToParticles.keys():

            if (flexibleAtoms.has_key(pName) == 0):

                continue

            otherParticle = otherNamesToParticles[pName]

            modelParticle = self.namesToParticles[pName]

            otherVector.append(IMP.core.XYZ.decorate_particle(otherParticle).get_coordinates())

            modelVector.append(IMP.core.XYZ.decorate_particle(modelParticle).get_coordinates())

        rmsd = IMP.atom.get_rmsd(otherVector, modelVector)

        return rmsd


    def calculateNativeRmsd(self, flexibleAtoms):


        if (self.wroteNativeProtein == 0):

            pdbName = self.getParam("native_pdb_input_file")

            self.nativeModel = IMP.Model()

            self.nativeProtein = IMP.atom.read_pdb(pdbName, self.nativeModel, IMP.atom.ATOMPDBSelector())

            self.wroteNativeProtein = 1


        return self.calculateRmsd(self.nativeProtein, flexibleAtoms)


    def calculateTrajectoryRmsd(self, trajectoryFile, trajectoryFrame, flexibleAtoms):

        pdbName = self.getParam("native_pdb_input_file")

        otherModel = IMP.Model()

        otherProtein = IMP.atom.read_pdb(pdbName, self.nativeModel, IMP.atom.ATOMPDBSelector())

        outputDir = self.getParam("output_directory")

        fullFile = os.path.join(outputDir, trajectoryFile)

        print "open calculate traj rmf %s" % fullFile

        rh = RMF.open_rmf_file(fullFile)

        IMP.rmf.set_hierarchies(rh, [otherProtein])

        if (trajectoryFrame == -1):

            trajectoryFrame = IMP.rmf.get_number_of_frames(rh, otherProtein) - 1

        IMP.rmf.load_frame(rh, trajectoryFrame, otherProtein)

        return self.calculateRmsd(otherProtein, flexibleAtoms)


    def createAllSubsetAssignments(self):


        lat = IMP.domino.ListAssignmentsTable()

        rssft = IMP.domino.RestraintScoreSubsetFilterTable(self.model.get_root_restraint_set(), self.dominoPst)


        leafNodeIndexList = self.getLeafNodeIndexList()


        for nodeIndex in leafNodeIndexList:

            #get subset for this leaf

            subset = self.mt.get_vertex_name(nodeIndex)

            particleNameList = []

            for particle in subset:

                particleNameList.append(self.quickParticleName(particle))

            print "creating initial assignments for leaf %s" % nodeIndex

            #use particleInfo to create assignments and filter them

            assignments = self.createUniqueLeafAssignments(particleNameList, self.particleInfo)

            filteredAssignments = self.filterAssignments(assignments, subset, nodeIndex, rssft)


            #add assignemtns to container and listAssignmentTable

            packedAssignmentContainer = IMP.domino.PackedAssignmentContainer()

            for assignment in filteredAssignments:

                packedAssignmentContainer.add_assignment(assignment)

            lat.set_assignments(subset, packedAssignmentContainer)


        self.lat = lat

        self.rssft = rssft


    def runDomino(self):

        root = self.mt.get_vertices()[-1]

        completeAc = self.loadAssignments(root)

        self.completeAc = completeAc


    def loadAssignments(self, nodeIndex):


        children = self.mt.get_out_neighbors(nodeIndex)

        subset = self.mt.get_vertex_name(nodeIndex)

        heapCount = int(self.getParam("heap_count"))

        mine= IMP.domino.HeapAssignmentContainer(heapCount, self.rssft.get_subset_filter(subset, []))

        if len(children)==0:

            print "computing assignments for leaf %s" % nodeIndex


            self.sampler.load_vertex_assignments(nodeIndex, mine)

            print "leaf node %s has %s leaf assignments" % (nodeIndex, mine.get_number_of_assignments())

        else:

            if (children[0] > children[1]):

                children = [children[1], children[0]]

            # recurse on the two children

            firstAc = self.loadAssignments(children[0])

            secondAc = self.loadAssignments(children[1])

            self.logTimePoint(1)

            self.sampler.load_vertex_assignments(nodeIndex, firstAc, secondAc, mine)


            timeDifference = self.logTimePoint(0)

            print "Done Parent %s Assignments %s first child %s second child %s time %s" % (nodeIndex, mine.get_number_of_assignments(), firstAc.get_number_of_assignments(),

                                                                                       secondAc.get_number_of_assignments(), timeDifference)

        self.totalAssignments += mine.get_number_of_assignments()

        self.logMemory()

        return mine


    def writeOutput(self, flexibleAtoms, startTime):

        bestAssignment = -1

        bestDominoScore = 100000

        bestAssignment = 0

        finalAssignments = self.completeAc.get_assignments()

        for assignment in finalAssignments:

            IMP.domino.load_particle_states(self.dominoPst.get_subset(), assignment, self.dominoPst)

            score = self.model.evaluate(False)

            if (score < bestDominoScore):

                bestAssignment = assignment

                bestDominoScore = round(score, 2)

        print "best domino score is %s " % bestDominoScore

        print "best md score is %s" % self.bestMdScore

        print "best md rmsd is %s" % self.bestMdRmsd

        print "best cg score is %s" % self.bestCgScore

        print "best cg rmsd is %s" % self.bestCgRmsd

        print "merge tree contained %s total assignments" % self.totalAssignments


        IMP.domino.load_particle_states(self.dominoPst.get_subset(), bestAssignment, self.dominoPst)

        dominoVsMdRmsd = round(self.calculateTrajectoryRmsd(self.getParam("md_trajectory_output_file"), self.bestMdScoreFrame, flexibleAtoms), 2)

        cg = IMP.core.ConjugateGradients(self.model)

        cg.optimize(100)

        IMP.atom.write_pdb(self.protein, os.path.join(self.getParam("output_directory"), self.getParam("minimum_domino_score_pdb")))


        dominoVsCgRmsd = round(self.calculateTrajectoryRmsd(self.bestCgScoreFile, -1, flexibleAtoms), 2)

        dominoMinimizedScore = round(self.model.evaluate(False), 2)

        dominoRmsd = round(self.calculateNativeRmsd(flexibleAtoms), 2)


        runTime = round(time.time() - startTime, 2)

        print "final domino score (after cg): %s" % dominoMinimizedScore

        print "final domino rmsd: %s" % dominoRmsd

        print "best domino rmsd with best md score: %s" % dominoVsMdRmsd

        print "domino rmsd with best cg score: %s" % dominoVsCgRmsd

        print "Final Results\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.bestMdScore, self.bestMdRmsd, self.bestCgScore, self.bestCgRmsd, bestDominoScore, dominoRmsd, dominoMinimizedScore, dominoVsCgRmsd, self.totalAssignments, self.maxMem, runTime)


    ####################

    # Parallel methods

    ####################

    def createSubsetFromParticles(self, particleNames):

        particleNameList = particleNames.split(" ")

        particleList = []

        for particleName in particleNameList:

            particle = self.namesToParticles[particleName]

            particleList.append(particle)


        subset = IMP.domino.Subset(particleList)

        return [subset, particleList]


    def createHdf5AssignmentContainer(self, index, particleNames, read):

        root = self.getAssignmentContainerRoot(index, read)

        print "got root for index %s" % index

        dataset = 0

        if (read == 1):

            dataset = root.get_child_index_data_set_2d(str(index))

        else:

            dataset = root.add_child_index_data_set_2d(str(index))

        print "added child index dataset"

        #firstDataset.set_size([

        [subset, particleOrder] = self.createSubsetFromParticles(particleNames)

        print "created subset for index %s" % index

        hdf5Ac = IMP.domino.create_assignments_container(dataset, subset, particleOrder)

        print "returning from create"

        order = IMP.domino.get_order(subset, particleOrder)

        for nextInt in order:

            print "next int is %s" % nextInt

        return [subset, hdf5Ac]


    def loadAssignmentsParallel(self, nodeIndex, particleInfo, mtIndexToNodeInfo, mtIndexToSubsetOrder, mtIndexToParticles):

        IMP.base.set_log_level(IMP.WARNING)


        if (mtIndexToNodeInfo[nodeIndex].has_key("firstChild") == 0):

            print "writing file for leaf index %s" % nodeIndex

            return self.createAssignmentsParallel(particleInfo, nodeIndex, mtIndexToParticles)


        else:

            beginTime = self.logTimePoint(1)

            firstChildIndex = mtIndexToNodeInfo[nodeIndex]["firstChild"]

            [firstSubset, firstAc] = self.createHdf5AssignmentContainer(firstChildIndex, mtIndexToSubsetOrder[firstChildIndex], 1)

            firstAcCreateTime = self.logTimePoint(0)


            secondChildIndex = mtIndexToNodeInfo[nodeIndex]["secondChild"]

            [secondSubset, secondAc] = self.createHdf5AssignmentContainer(secondChildIndex, mtIndexToSubsetOrder[secondChildIndex], 1)

            secondAcCreateTime = self.logTimePoint(0)


            print "getting assignments for nodeIndex %s first child %s second child %s" % (nodeIndex, firstChildIndex, secondChildIndex)

            firstChildParticles = mtIndexToSubsetOrder[firstChildIndex]

            secondChildParticles = mtIndexToSubsetOrder[secondChildIndex]

            myParticles = mtIndexToParticles[nodeIndex]


            print "first child particles %s\nsecond child particles %s\nmy particles; %s\n" % (firstChildParticles, secondChildParticles, myParticles)

            for p in firstSubset:

                print "next particle in first subset: %s" % self.quickParticleName(p)


            for p in secondSubset:

                print "next particle in second subset: %s" % self.quickParticleName(p)


            for assignment in firstAc.get_assignments():

                print "next assignment for first child %s: %s" % (firstChildIndex, assignment)


            for assignment in secondAc.get_assignments():

                print "next assignment for second child %s: %s" % (secondChildIndex, assignment)


            [mySubset, myAc] = self.createHdf5AssignmentContainer(nodeIndex, mtIndexToParticles[nodeIndex], 0)

            print "done creating hdf5"

            prepTime = self.logTimePoint(0)

            rssft = IMP.domino.RestraintScoreSubsetFilterTable(self.model.get_root_restraint_set(), self.dominoPst)

            rssf = rssft.get_subset_filter(mySubset, [])


            #heapAc = IMP.domino.HeapAssignmentContainer(1000, rssf)


            IMP.domino.load_merged_assignments(firstSubset, firstAc, secondSubset, secondAc, [rssft], myAc)


            heapTime = self.logTimePoint(0)

            #myAc.add_assignments(heapAc.get_assignments())

            addTime = self.logTimePoint(0)

            for assignment in myAc.get_assignments():

                print "loadAssignmentsParallel next assignment for %s: %s" % (nodeIndex, assignment)

            doneTime = self.logTimePoint(0)

            firstChildCount = firstAc.get_number_of_assignments()

            secondChildCount = secondAc.get_number_of_assignments()

            print "first count: %s second count: %s begin: %s firstAc: %s secondAc: %s  prep: %s heap: %s add: %s done: %s" % (firstChildCount, secondChildCount, beginTime, firstAcCreateTime, secondAcCreateTime,  prepTime, heapTime, addTime, doneTime)

            subsetOrder = self.getSubsetOrderList(mySubset)

            return subsetOrder


    def createAssignmentsParallel(self, particleInfo, nodeIndex, mtIndexToParticles):


        subsetName = mtIndexToParticles[nodeIndex]

        print "starting assignments parallel leaf index %s subset name %s" % (nodeIndex, subsetName)

        [subset, particleList] = self.createSubsetFromParticles(subsetName)


        particleNameList = subsetName.split(" ")


        #create assignment by reading states

        finalAssignments = self.createUniqueLeafAssignments(particleNameList, particleInfo)


        #lat = IMP.domino.ListAssignmentsTable()

        #lat.set_assignments(subset, finalAssignmentContainer)

        #self.sampler.set_assignments_table(lat)


        #hdf5AssignmentContainer = IMP.domino.HDF5AssignmentContainer(dataset, subset, self.dominoPst.get_particles(), subsetName)

        rssft = IMP.domino.RestraintScoreSubsetFilterTable(self.model.get_root_restraint_set(), self.dominoPst)

        filteredAssignments = self.filterAssignments(finalAssignments, subset, nodeIndex, rssft)

        root = self.getAssignmentContainerRoot(nodeIndex, 0)

        dataset= root.add_child_index_data_set_2d(str(nodeIndex))

        dataset.set_size([0, len(subset)])


        hdf5AssignmentContainer = IMP.domino.create_assignments_container(dataset, subset, particleOrder)

        for assignment in filteredAssignments:

            hdf5AssignmentContainer.add_assignment(assignment)

        for assignment in hdf5AssignmentContainer.get_assignments():

            print "hdf5 assignment container node %s next assignmet %s" % (nodeIndex, assignment)


        #self.checkAssignments(subset, nodeIndex, particleOrder)

        subsetOrder = self.getSubsetOrderList(subset)

        print "leaf node returning with order %s" % subsetOrder

        return subsetOrder


    #Write pymol session files for the interactions across atoms and all subsets

    def writePymolData(self):


        outputDir = self.getParam("output_directory")

        geometry = IMP.domino.get_interaction_graph_geometry(self.ig)

        pymolInteractions = self.getParam("pymol_interactions_file")

        w= IMP.display.PymolWriter(os.path.join(outputDir, pymolInteractions))


        for gg in geometry:

            w.add_geometry(gg)


        pymolSubsets = self.getParam("pymol_subsets_file")

        geometry = IMP.domino.get_subset_graph_geometry(self.jt)

        w= IMP.display.PymolWriter(os.path.join(outputDir, pymolSubsets))

        for gg in geometry:

            w.add_geometry(gg)


    #Clean the default name of the vertex (in brackets and with each atom contained in quotes) and return a string where [] and " are removed

    def cleanVertexName(self, vertexName):


        nodeRe = re.compile('Subset\(\[(.*?)\s*\]')   # not sure if any vertices still have a Subset prefix but keeping anyway

        secondNodeRe = re.compile('\[(.*?)\s*\]')   #atom name

        node = nodeRe.search(str(vertexName))

        secondNode = secondNodeRe.search(str(vertexName))

        vertexNameFinal = ""

        foundName = 0

        if node:

            foundName = node.group(1)

        if secondNode:

            foundName = secondNode.group(1)

        vertexNameFinal = foundName.replace('"', '')

        return vertexNameFinal


    def getSubsetOrderList(self, subset):

        subsetOrderList = []

        for particle in subset:

            name = self.quickParticleName(particle)

            subsetOrderList.append(name)

        subsetOrder = " ".join(subsetOrderList)

        return subsetOrder


    def checkAssignments(self, subset, nodeIndex, particleOrder):

        print "reading back in assignments for leaf index %s" % nodeIndex

        root = self.getAssignmentContainerRoot(nodeIndex, 1)

        dataset = root.get_child_index_data_set_2d(str(nodeIndex))

        hdf5 = IMP.domino.create_assignments_container(dataset, subset, particleOrder)

        for assignment in hdf5.get_assignments():

            print "leaf index %s read back in %s" % (nodeIndex, assignment)


    def createSamplerLite(self):

        s=IMP.domino.DominoSampler(self.model, self.dominoPst)


        if (self.getParam("cross_subset_filtering") == 1):

            s.set_use_cross_subset_filtering(1)


        self.sampler = s


    def createSiblingMap(self, parentIndex):


        children = self.mt.get_out_neighbors(parentIndex)

        if (len(children) > 0):

            firstChild = children[0]

            secondChild = children[1]

            self.parentSiblingMap[firstChild] = {}

            self.parentSiblingMap[firstChild]["sibling"] = secondChild

            self.parentSiblingMap[firstChild]["parent"] = parentIndex


            self.parentSiblingMap[secondChild] = {}

            self.parentSiblingMap[secondChild]["sibling"] = firstChild

            self.parentSiblingMap[secondChild]["parent"] = parentIndex

            print "created map for parent %s first child %s second child %s" % (parentIndex, firstChild, secondChild)


            self.parentSiblingMap[parentIndex]["firstChild"] = firstChild

            self.parentSiblingMap[parentIndex]["secondChild"] = secondChild


            self.createSiblingMap(firstChild)

            self.createSiblingMap(secondChild)


    def getMtIndexToNodeInfo(self):

        return self.parentSiblingMap


    def getLeafParentNodeIndexList(self):

        leafParentNodeIndexList = {}

        leafNodeIndexList = self.getLeafNodeIndexList()

        for leafIndex in leafNodeIndexList:

            parent = self.parentSiblingMap[leafIndex]["parent"]

            leafParentNodeIndexList[parent] = 1

        return leafParentNodeIndexList


    def getMtIndexToNameList(selt):

        mtIndexToNames = {}

        for index in self.mt.get_vertices():

            name = self.mt.get_vertex_name(index)

            mtIndexToNames[index] = name

        return mtIndexToNames


    def getAssignmentContainerRoot(self, subsetIndex, read):

        outputDir = self.getParam("output_directory")

        filePrefix = self.getParam("subset_assignment_output_file")

        assignmentFileName = os.path.join(outputDir, "%s%s" % (filePrefix, subsetIndex))

        print "creating hdf5 file with name %s" % assignmentFileName

        root = 0

        if (read == 1):

            root = RMF.open_hdf5_file(assignmentFileName)

        else:

            root= RMF.create_hdf5_file(assignmentFileName)

        return root


    ##########

    #Begin Cytoscape Methods

    ##########


    def writeVisualization(self):


        self.writeCytoscapeIgInput()

        self.writeCytoscapeJtInput()

        self.writeCytoscapeMtInput()

        self.writeCytoscapeScripts()

        self.writePymolData()


    def writeCytoscapeScripts(self):

        outputDir = self.getParam("output_directory")

        mTreeCytoscapeInput = self.getParam("mtree_cytoscape_input_file")

        mTreeCytoscapeAssignments = self.getParam("mtree_cytoscape_assignment_file")

        mTreeCytoscapeAtomChains = self.getParam("mtree_cytoscape_atom_chain_file")

        mTreeCytoscapeAtomSummary = self.getParam("mtree_cytoscape_atom_summary_file")


        mTreeCytoscapeScript = self.getParam("mtree_cytoscape_script")

        mTreeFh = open(os.path.join(outputDir, mTreeCytoscapeScript), 'w')

        mTreeFh.write("network import file=%s\n" % os.path.join(outputDir, mTreeCytoscapeInput))

        mTreeFh.write("node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAssignments))

        mTreeFh.write("node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAtomSummary))

        mTreeFh.write("node import attributes file=\"%s\"\n" % os.path.join(outputDir, mTreeCytoscapeAtomChains))

        mTreeFh.write("layout jgraph-tree\n")


    def getGraphStructure(self, graph, fileName, separator):

        #write junction tree to file

        graphLogWrite = open(fileName, 'w')

        graph.show(graphLogWrite)

        graphLogWrite.close()


        #read file

        graphLogRead = open(fileName, 'r')


        nodeRe = re.compile('^(\d+)\[label\=\"\[*(.*?)\s*\]*\"')   #atom name

        separatorEscape = "\\" + separator

        edgeString = "^(\d+)\-%s(\d+)" % separatorEscape

        edgeRe = re.compile(edgeString)


        nodesToNodes = {} #keys: source node, value: target node (track edges)

        nodesToNames = {} # keys: node number, value; string parsed from file


        for line in graphLogRead:


            #search for nodes

            node = nodeRe.search(line)

            if node:

                nodeNumber = node.group(1)

                atomString = node.group(2)

                nodesToNames[nodeNumber] = atomString

                continue


            #search for edges

            edge = edgeRe.search(line)

            if edge:

                firstNode = edge.group(1)

                secondNode = edge.group(2)

                firstNodeDict = {}

                if (nodesToNodes.has_key(firstNode)):

                    firstNodeDict = nodesToNodes[firstNode]

                firstNodeDict[secondNode] = 1

                nodesToNodes[firstNode] = firstNodeDict


        return [nodesToNames, nodesToNodes]


    def writeEdgeFile(self, nodesToNodes, edgeFileName):

        #write edge file

        outputDir = self.getParam("output_directory")

        graphInputFile = open(os.path.join(outputDir, edgeFileName), 'w')

        for firstNode in nodesToNodes.keys():

            nodeDict = nodesToNodes[firstNode]

            for secondNode in nodeDict.keys():

                graphInputFile.write("%s ttt %s\n" % (firstNode, secondNode))

        graphInputFile.close()


    def writeCytoscapeIgInput(self):

        outputDir = self.getParam("output_directory")

        igOutputFile = self.getParam("ig_output_file")

        [nodesToNames, nodesToNodes] = self.getGraphStructure(self.ig, os.path.join(outputDir, igOutputFile), "-")


        self.writeEdgeFile(nodesToNodes, self.getParam("ig_cytoscape_input_file"))


        #write residue numbers for each node

        igResiduesFile = self.getParam("ig_cytoscape_residues_file")

        peptideChain = self.getParam("peptide_chain")


        subsetResidueFile = open(os.path.join(outputDir, igResiduesFile), 'w')

        subsetResidueFile.write("ResidueNumber\n")

        for nodeNumber in nodesToNames.keys():

            nodeName = nodesToNames[nodeNumber]


            [nodeChain, residueNumber, nodeAtom] = atomicDominoUtilities.getAtomInfoFromName(nodeName)

            if (nodeChain == peptideChain): #peptideAtom

                subsetResidueFile.write("%s = %s\n" % (nodeNumber, residueNumber))

            else:

                subsetResidueFile.write("%s = 100\n" % nodeNumber)  #for now just write arbitrary number to designate as protein atom


    def writeCytoscapeMtInput(self):

        outputDir = self.getParam("output_directory")

        mTreeOutputFile = self.getParam("mtree_output_file")

        [nodesToNames, nodesToNodes] = self.getGraphStructure(self.mt, os.path.join(outputDir, mTreeOutputFile), ">")


        self.writeEdgeFile(nodesToNodes, self.getParam("mtree_cytoscape_input_file"))

        self.writeNodeNameAttributes(nodesToNames, self.getParam("mtree_cytoscape_atom_name_file"), self.getParam("mtree_cytoscape_atom_summary_file"),

                                     self.getParam("mtree_cytoscape_atom_chain_file"))


    def writeCytoscapeJtInput(self):


        outputDir = self.getParam("output_directory")

        jTreeOutputFile = self.getParam("jtree_output_file")

        [nodesToNames, nodesToNodes] = self.getGraphStructure(self.jt, os.path.join(outputDir, jTreeOutputFile), "-")


        self.writeEdgeFile(nodesToNodes, self.getParam("jtree_cytoscape_input_file"))


        self.writeNodeNameAttributes(nodesToNames, self.getParam("jtree_cytoscape_atom_name_file"), self.getParam("jtree_cytoscape_atom_summary_file"),

                                     self.getParam("jtree_cytoscape_atom_chain_file"))


        #write edge weight file -- weights are number of shared particles across nodes

        jtreeCytoscapeEdgeFile = self.getParam("jtree_cytoscape_edge_file")

        edgeWeightFile = open(os.path.join(outputDir, jtreeCytoscapeEdgeFile), 'w')

        edgeWeightFile.write("SubsetOverlap (class=Integer)\n")

        for firstNode in nodesToNodes.keys():

            nodeDict = nodesToNodes[firstNode]

            for secondNode in nodeDict.keys():

                firstNodeAtoms = nodesToNames[firstNode].split(" ")

                secondNodeAtoms = nodesToNames[secondNode].split(" ")

                intersection = [val for val in firstNodeAtoms if val in secondNodeAtoms]

                edgeWeightFile.write("%s (pp) %s = %s\n" % (firstNode, secondNode, len(intersection)))

        edgeWeightFile.close()


    def getAtomTypeCounts(self, atomNames):


        atomNames = atomNames.lstrip('[')

        atomNames = atomNames.rstrip(']')

        atomNames = atomNames.rstrip()


        atomList = atomNames.split(" ") #atom names

        peptideAtomCount = 0

        proteinAtomCount = 0

        for atom in atomList:

            [chain, residue, atom] = atomicDominoUtilities.getAtomInfoFromName(atom)

            if (chain == self.getParam("peptide_chain")):

                peptideAtomCount += 1

            else:

                proteinAtomCount += 1

        return [peptideAtomCount, proteinAtomCount]


    def writeNodeNameAttributes(self, nodesToNames, atomNameFile, atomSummaryFile, atomChainFile):

        #write attribute file (atom names for each node)

        outputDir = self.getParam("output_directory")

        subsetAtomNameFile = open(os.path.join(outputDir, atomNameFile), 'w')

        subsetAtomSummaryFile = open(os.path.join(outputDir, atomSummaryFile), 'w')

        subsetAtomChainFile = open(os.path.join(outputDir, atomChainFile), 'w')


        subsetAtomNameFile.write("Atom names\n")

        subsetAtomSummaryFile.write("Atom Summary\n")

        subsetAtomChainFile.write("Atom chain\n")

        for node in nodesToNames.keys():

            atomNames =  nodesToNames[node]

            subsetAtomNameFile.write("%s = %s\n" % (node, atomNames))

            [peptideAtomCount, proteinAtomCount] = self.getAtomTypeCounts(atomNames)

            #Number of protein and peptide atoms in each subset

            subsetAtomSummaryFile.write("%s = %sp %sl\n" % (node, proteinAtomCount, peptideAtomCount))


            #whether each subset has protein, peptide, or a mix of atoms

            if (proteinAtomCount == 0):

                subsetAtomChainFile.write("%s = 1\n" % node)

            elif (peptideAtomCount == 0):

                subsetAtomChainFile.write("%s = 2\n" % node)

            else:

                subsetAtomChainFile.write("%s = 3\n" % node)


        subsetAtomChainFile.close()

        subsetAtomSummaryFile.close()

        subsetAtomNameFile.close()


    ##########

    #End Cytoscape Methods

    ##########