doc/html/macros_8py_source.html

 """@namespace IMP.pmi.macros

 Protocols for sampling structures and analyzing them.

 """


 import IMP.pmi.representation

 import IMP.pmi.tools

 import IMP.pmi.samplers

 import IMP.pmi.output

 import IMP.pmi.analysis

 import IMP.pmi.io.input

 import IMP.rmf

 import RMF

 import os

 import glob

 from operator import itemgetter

 from collections import defaultdict

 import numpy as np


 class ReplicaExchange0(object):

     """A macro to help setup and run replica exchange.

     Supports monte carlo and molecular dynamics.

     Produces trajectory RMF files, best PDB structures,

     and output stat files.

     @param model                    The IMP model

     @param representation           PMI.Representation() (or list of them, for multi-state modeling)

     @param root_hier                Instead of passing Representation, just pass a hierarchy

     @param mote_carlo_sample_objcts Objects for MC sampling

     @param molecular_dynamics_sample_objects Objects for MD sampling

     @param output_objects           Objects with get_output() for packing into stat files

     @param crosslink_restraints     Harmonic restraints to go in output RMF files

     @param monte_carlo_temperature  MC temp

     @param replica_exchange_minimum_temperature Low temp for REX

     @param replica_exchange_maximum_temperature High temp for REX

     @param num_sample_rounds        Number of rounds of MC/MD per cycle

     @param number_of_best_scoring_models Number of top-scoring PDB models to keep around

     @param monte_carlo_steps        Number of MC steps per round

     @param molecular_dynamics_steps  Number of MD steps per round

     @param number_of_frames         Number of REX frames to run

     @param nframes_write_coordinates How often to write the coordinates of a frame

     @param write_initial_rmf        Write the initial configuration

     """

     def __init__(self, model,

                  representation=None,

                  root_hier=None,

                  sample_objects=None, # DEPRECATED

                  monte_carlo_sample_objects=None,

                  molecular_dynamics_sample_objects=None,

                  output_objects=None,

                  crosslink_restraints=None,

                  monte_carlo_temperature=1.0,

                  simulated_annealing=False,

                  simulated_annealing_minimum_temperature=1.0,

                  simulated_annealing_maximum_temperature=2.5,

                  simulated_annealing_minimum_temperature_nframes=100,

                  simulated_annealing_maximum_temperature_nframes=100,

                  replica_exchange_minimum_temperature=1.0,

                  replica_exchange_maximum_temperature=2.5,

                  num_sample_rounds=1,

                  number_of_best_scoring_models=500,

                  monte_carlo_steps=10,

                  molecular_dynamics_steps=10,

                  number_of_frames=1000,

                  nframes_write_coordinates=1,

                  write_initial_rmf=True,

                  initial_rmf_name_suffix="initial",

                  stat_file_name_suffix="stat",

                  best_pdb_name_suffix="model",

                  do_clean_first=True,

                  do_create_directories=True,

                  global_output_directory="./",

                  rmf_dir="rmfs/",

                  best_pdb_dir="pdbs/",

                  replica_stat_file_suffix="stat_replica",

                  em_object_for_rmf=None,

                  atomistic=False,

                  replica_exchange_object=None):


         self.model = model

         self.vars = {}


         ### add check hierarchy is multistate

         if representation:

             if type(representation) == list:

                 self.is_multi_state = True

                 self.root_hiers = [r.prot for r in representation]

                 self.vars["number_of_states"] = len(representation)

             else:

                 self.is_multi_state = False

                 self.root_hier = representation.prot

                 self.vars["number_of_states"] = 1

         elif root_hier:

             states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)

             self.vars["number_of_states"] = len(states)

             if len(states)>1:

                 self.root_hiers = states

                 self.is_multi_state = True

             else:

                 self.root_hier = root_hier

                 self.is_multi_state = False

         else:

             print "ERROR: Must provide representation or root_hier"

             return


         self.crosslink_restraints = crosslink_restraints

         self.em_object_for_rmf = em_object_for_rmf

         self.monte_carlo_sample_objects = monte_carlo_sample_objects

         if sample_objects is not None:

             self.monte_carlo_sample_objects+=sample_objects

         self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects

         self.output_objects = output_objects

         self.replica_exchange_object = replica_exchange_object

         self.vars["monte_carlo_temperature"] = monte_carlo_temperature

         self.vars[

             "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature

         self.vars[

             "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature


         self.vars["simulated_annealing"]=\

                                    simulated_annealing

         self.vars["simulated_annealing_minimum_temperature"]=\

                                    simulated_annealing_minimum_temperature

         self.vars["simulated_annealing_maximum_temperature"]=\

                                    simulated_annealing_maximum_temperature

         self.vars["simulated_annealing_minimum_temperature_nframes"]=\

                                    simulated_annealing_minimum_temperature_nframes

         self.vars["simulated_annealing_maximum_temperature_nframes"]=\

                                    simulated_annealing_maximum_temperature_nframes


         self.vars["num_sample_rounds"] = num_sample_rounds

         self.vars[

             "number_of_best_scoring_models"] = number_of_best_scoring_models

         self.vars["monte_carlo_steps"] = monte_carlo_steps

         self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps

         self.vars["number_of_frames"] = number_of_frames

         self.vars["nframes_write_coordinates"] = nframes_write_coordinates

         self.vars["write_initial_rmf"] = write_initial_rmf

         self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix

         self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix

         self.vars["stat_file_name_suffix"] = stat_file_name_suffix

         self.vars["do_clean_first"] = do_clean_first

         self.vars["do_create_directories"] = do_create_directories

         self.vars["global_output_directory"] = global_output_directory

         self.vars["rmf_dir"] = rmf_dir

         self.vars["best_pdb_dir"] = best_pdb_dir

         self.vars["atomistic"] = atomistic

         self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix


     def show_info(self):

         print "ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica "

         print "--- it stores the best scoring pdb models in pdbs/"

         print "--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature"

         print "--- variables:"

         keys = self.vars.keys()

         keys.sort()

         for v in keys:

             print "------", v.ljust(30), self.vars[v]


     def get_replica_exchange_object(self):

         return self.replica_exchange_object


     def execute_macro(self):


         temp_index_factor = 100000.0

         samplers=[]

         sampler_mc=None

         sampler_md=None

         if self.monte_carlo_sample_objects is not None:

             print "Setting up MonteCarlo"

             sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,

                                                  self.monte_carlo_sample_objects,

                                                  self.vars["monte_carlo_temperature"])

             if self.vars["simulated_annealing"]:

                 tmin=self.vars["simulated_annealing_minimum_temperature"]

                 tmax=self.vars["simulated_annealing_maximum_temperature"]

                 nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]

                 nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]

                 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)

             self.output_objects.append(sampler_mc)

             samplers.append(sampler_mc)


         if self.molecular_dynamics_sample_objects is not None:

             print "Setting up MolecularDynamics"

             sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,

                                                        self.molecular_dynamics_sample_objects,

                                                        self.vars["monte_carlo_temperature"])

             if self.vars["simulated_annealing"]:

                 tmin=self.vars["simulated_annealing_minimum_temperature"]

                 tmax=self.vars["simulated_annealing_maximum_temperature"]

                 nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]

                 nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]

                 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)

             self.output_objects.append(sampler_md)

             samplers.append(sampler_md)

 # -------------------------------------------------------------------------


         print "Setting up ReplicaExchange"

         rex = IMP.pmi.samplers.ReplicaExchange(self.model,

                                                self.vars[

                                                    "replica_exchange_minimum_temperature"],

                                                self.vars[

                                                    "replica_exchange_maximum_temperature"],

                                                samplers,

                                                replica_exchange_object=self.replica_exchange_object)

         self.replica_exchange_object = rex.rem


         myindex = rex.get_my_index()

         self.output_objects.append(rex)


         # must reset the minimum temperature due to the

         # different binary length of rem.get_my_parameter double and python

         # float

         min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)


 # -------------------------------------------------------------------------


         globaldir = self.vars["global_output_directory"] + "/"

         rmf_dir = globaldir + self.vars["rmf_dir"]

         pdb_dir = globaldir + self.vars["best_pdb_dir"]


         if self.vars["do_clean_first"]:

             pass


         if self.vars["do_create_directories"]:


             try:

                 os.makedirs(globaldir)

             except:

                 pass

             try:

                 os.makedirs(rmf_dir)

             except:

                 pass


             if not self.is_multi_state:

                 try:

                     os.makedirs(pdb_dir)

                 except:

                     pass

             else:

                 for n in range(self.vars["number_of_states"]):

                     try:

                         os.makedirs(pdb_dir + "/" + str(n))

                     except:

                         pass


 # -------------------------------------------------------------------------


         sw = IMP.pmi.tools.Stopwatch()

         self.output_objects.append(sw)


         print "Setting up stat file"

         output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])

         low_temp_stat_file = globaldir + \

             self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"

         output.init_stat2(low_temp_stat_file,

                           self.output_objects,

                           extralabels=["rmf_file", "rmf_frame_index"])


         print "Setting up replica stat file"

         replica_stat_file = globaldir + \

             self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"

         output.init_stat2(replica_stat_file, [rex], extralabels=["score"])


         print "Setting up best pdb files"

         if not self.is_multi_state:

             if self.vars["number_of_best_scoring_models"] > 0:

                 output.init_pdb_best_scoring(pdb_dir + "/" +

                                              self.vars["best_pdb_name_suffix"],

                                              self.root_hier,

                                              self.vars[

                                                  "number_of_best_scoring_models"],

                                              replica_exchange=True)

         else:

             if self.vars["number_of_best_scoring_models"] > 0:

                 for n in range(self.vars["number_of_states"]):

                     output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +

                                                self.vars["best_pdb_name_suffix"],

                                                self.root_hiers[n],

                                                self.vars[

                                                    "number_of_best_scoring_models"],

                                                replica_exchange=True)


 # ---------------------------------------------


         if not self.em_object_for_rmf is None:

             if not self.is_multi_state:

                 output_hierarchies = [

                     self.root_hier,

                     self.em_object_for_rmf.get_density_as_hierarchy(

                     )]

             else:

                 output_hierarchies = self.root_hiers

                 output_hierarchies.append(

                     self.em_object_for_rmf.get_density_as_hierarchy())

         else:

             if not self.is_multi_state:

                 output_hierarchies = [self.root_hier]

             else:

                 output_hierarchies = self.root_hiers


 #----------------------------------------------

         print "Setting up and writing initial rmf coordinate file"

         init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]

         output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",

                         output_hierarchies)

         if self.crosslink_restraints:

             output.add_restraints_to_rmf(

                 init_suffix + "." + str(myindex) + ".rmf3",

                 self.crosslink_restraints)

         output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")

         output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")


 #----------------------------------------------


         print "Setting up production rmf files"


         rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"

         output.init_rmf(rmfname, output_hierarchies)


         if self.crosslink_restraints:

             output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)


         ntimes_at_low_temp = 0


         if myindex == 0:

             self.show_info()


         for i in range(self.vars["number_of_frames"]):

             for nr in range(self.vars["num_sample_rounds"]):

                 if sampler_mc is not None:

                     sampler_mc.optimize(self.vars["monte_carlo_steps"])

                 if sampler_md is not None:

                     sampler_md.optimize(self.vars["molecular_dynamics_steps"])

             score = self.model.evaluate(False)

             output.set_output_entry("score", score)


             my_temp_index = int(rex.get_my_temp() * temp_index_factor)


             if min_temp_index == my_temp_index:

                 print "--- frame %s score %s " % (str(i), str(score))


                 if i % self.vars["nframes_write_coordinates"]==0:

                     print '--- writing coordinates'

                     if self.vars["number_of_best_scoring_models"] > 0:

                         output.write_pdb_best_scoring(score)

                     output.write_rmf(rmfname)

                     output.set_output_entry("rmf_file", rmfname)

                     output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)

                 else:

                     output.set_output_entry("rmf_file", rmfname)

                     output.set_output_entry("rmf_frame_index", '-1')

                 output.write_stat2(low_temp_stat_file)

                 ntimes_at_low_temp += 1


             output.write_stat2(replica_stat_file)

             rex.swap_temp(i, score)


 # -----------------------------------------------------------------------


 def BuildModel0(

     m,

     data,

     resolutions=[1,

                  10],

     missing_bead_size=20,

         residue_per_gaussian=None):

     '''

     The macro construct a component for each subunit (no splitting, nothing fancy)

     You can pass the resolutions and the bead size for the missing residue regions.

     To use this macro, you must provide the following data structure:


     Component  pdbfile    chainid  rgb color     fastafile     sequence id

                                                                       in fastafile


 data = [("Rpb1",     pdbfile,   "A",     0.00000000,  (fastafile,    0)),

       ("Rpb2",     pdbfile,   "B",     0.09090909,  (fastafile,    1)),

       ("Rpb3",     pdbfile,   "C",     0.18181818,  (fastafile,    2)),

       ("Rpb4",     pdbfile,   "D",     0.27272727,  (fastafile,    3)),

       ("Rpb5",     pdbfile,   "E",     0.36363636,  (fastafile,    4)),

       ("Rpb6",     pdbfile,   "F",     0.45454545,  (fastafile,    5)),

       ("Rpb7",     pdbfile,   "G",     0.54545455,  (fastafile,    6)),

       ("Rpb8",     pdbfile,   "H",     0.63636364,  (fastafile,    7)),

       ("Rpb9",     pdbfile,   "I",     0.72727273,  (fastafile,    8)),

       ("Rpb10",    pdbfile,   "L",     0.81818182,  (fastafile,    9)),

       ("Rpb11",    pdbfile,   "J",     0.90909091,  (fastafile,   10)),

       ("Rpb12",    pdbfile,   "K",     1.00000000,  (fastafile,   11))]


     '''


     r = IMP.pmi.representation.Representation(m)


     # the dictionary for the hierarchies,

     hierarchies = {}


     for d in data:

                 # retrieve the information from the data structure

         component_name = d[0]

         pdb_file = d[1]

         chain_id = d[2]

         color_id = d[3]

         fasta_file = d[4][0]

         # this function

         fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)

         fasta_file_id = d[4][1]

         # avoid to add a component with the same name

         r.create_component(component_name,

                            color=color_id)


         r.add_component_sequence(component_name,

                                  fasta_file,

                                  id=fastids[fasta_file_id])


         hierarchies = r.autobuild_model(component_name,

                                         pdb_file,

                                         chain_id,

                                         resolutions=resolutions,

                                         missingbeadsize=missing_bead_size)


         r.show_component_table(component_name)


         r.set_rigid_bodies([component_name])


         r.set_chain_of_super_rigid_bodies(

             hierarchies,

             min_length=2,

             max_length=2)


         r.setup_component_sequence_connectivity(component_name, resolution=1)

         r.setup_component_geometry(component_name)


     r.setup_bonds()

     # put it at the end of rigid bodies

     r.set_floppy_bodies()


     # set current coordinates as reference for RMSD calculation

     r.set_current_coordinates_as_reference_for_rmsd("Reference")


     return r


 # ----------------------------------------------------------------------


 class BuildModel(object):

     """A macro to build a Representation based on a Topology and lists of movers

     @param model The IMP model

     @param component_topologies List of IMP.pmi.topology.ComponentTopology items

     @param list_of_rigid_bodies List of lists of domain names from the components.

     @param list_of_super_rigid_bodies List of lists of domain names from the components.

     @param chain_of_super_rigid_bodies List of lists of domain names from the components

                                        Choices can only be from the same molecule

     @param sequence_connectivity_scale For scaling the connectivity restraint

     @param add_each_domain_as_rigid_body That way you don't have to put all of them in the list

     @param force_create_gmm_files If True, will sample and create GMMs no matter what.

                               If False, will only only sample if the files don't exist.

                               If number of Gaussians is zero, won't do anything.

     """

     def __init__(self,

                  model,

                  component_topologies,

                  list_of_rigid_bodies=[],

                  list_of_super_rigid_bodies=[],

                  chain_of_super_rigid_bodies=[],

                  sequence_connectivity_scale=4.0,

                  add_each_domain_as_rigid_body=False,

                  force_create_gmm_files=False):

         self.m = model

         self.simo = IMP.pmi.representation.Representation(self.m,

                                                           upperharmonic=True,

                                                           disorderedlength=False)


         data=component_topologies

         if list_of_rigid_bodies==[]:

             print "WARNING: No list of rigid bodies inputted to build_model()"

         if list_of_super_rigid_bodies==[]:

             print "WARNING: No list of super rigid bodies inputted to build_model()"

         if chain_of_super_rigid_bodies==[]:

             print "WARNING: No chain of super rigid bodies inputted to build_model()"

         all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\

                       +chain_of_super_rigid_bodies for d in sublist])

         all_available = set([c.domain_name for c in component_topologies])

         if not all_dnames <= all_available:

             raise ValueError("All requested movers must reference domain "

                              "names in the component topologies")


         self.domain_dict={}

         self.resdensities={}

         super_rigid_bodies={}

         chain_super_rigid_bodies={}

         rigid_bodies={}


         for c in data:

             comp_name         = c.name

             hier_name         = c.domain_name

             color             = c.color

             fasta_file        = c.fasta_file

             fasta_id          = c.fasta_id

             pdb_name          = c.pdb_file

             chain_id          = c.chain

             res_range         = c.residue_range

             offset            = c.pdb_offset

             bead_size         = c.bead_size

             em_num_components = c.em_residues_per_gaussian

             em_txt_file_name  = c.gmm_file

             em_mrc_file_name  = c.mrc_file


             if comp_name not in self.simo.get_component_names():

                 self.simo.create_component(comp_name,color=0.0)

                 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)


             # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data

             if em_num_components==0:

                 read_em_files=False

                 include_res0=False

             else:

                 if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:

                     read_em_files=False

                     include_res0=True

                 else:

                     read_em_files=True

                     include_res0=False


             outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,

                                    res_range,include_res0,beadsize=bead_size,

                                    color=color,offset=offset)

             if em_num_components!=0:

                 if read_em_files:

                     print "will read GMM files"

                 else:

                     print "will calculate GMMs"


                 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,

                                                     em_mrc_file_name,em_num_components,read_em_files)

                 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)

                 dens_hier+=beads

             else:

                 dens_hier=[]


             self.resdensities[hier_name]=dens_hier

             self.domain_dict[hier_name]=outhier+dens_hier


         # setup basic restraints

         for c in self.simo.get_component_names():

             self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)

             self.simo.setup_component_geometry(c)


         # create movers

         for rblist in list_of_rigid_bodies:

             rb=[]

             for rbmember in rblist:

                 rb+=[h for h in self.domain_dict[rbmember]]

             self.simo.set_rigid_body_from_hierarchies(rb)

         for srblist in list_of_super_rigid_bodies:

             srb=[]

             for srbmember in rblist:

                 srb+=[h for h in self.domain_dict[srbmember]]

             self.simo.set_super_rigid_body_from_hierarchies(srb)

         for clist in chain_of_super_rigid_bodies:

             crb=[]

             for crbmember in rblist:

                 crb+=[h for h in self.domain_dict[crbmember]]

             self.simo.set_chain_of_super_rigid_bodies(crb,2,3)


         self.simo.set_floppy_bodies()

         self.simo.setup_bonds()


     def get_representation(self):

         '''Return the Representation object'''

         return self.simo


     def get_density_hierarchies(self,hier_name_list):

         # return a list of density hierarchies

         # specify the list of hierarchy names

         dens_hier_list=[]

         for hn in hier_name_list:

             print hn

             dens_hier_list+=self.resdensities[hn]

         return dens_hier_list


     def set_gmm_models_directory(self,directory_name):

         self.gmm_models_directory=directory_name


     def get_pdb_bead_bits(self,hierarchy):

         pdbbits=[]

         beadbits=[]

         helixbits=[]

         for h in hierarchy:

             if "_pdb" in h.get_name():pdbbits.append(h)

             if "_bead" in h.get_name():beadbits.append(h)

             if "_helix" in h.get_name():helixbits.append(h)

         return (pdbbits,beadbits,helixbits)


     def scale_bead_radii(self,nresidues,scale):

         scaled_beads=set()

         for h in self.domain_dict:

             (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])

             slope=(1.0-scale)/(1.0-float(nresidues))


             for b in beadbits:

                 # I have to do the following

                 # because otherwise we'll scale more than once

                 if b not in scaled_beads:

                     scaled_beads.add(b)

                 else:

                     continue

                 radius=IMP.core.XYZR(b).get_radius()

                 num_residues=len(IMP.pmi.tools.get_residue_indexes(b))

                 scale_factor=slope*float(num_residues)+1.0

                 print scale_factor

                 new_radius=scale_factor*radius

                 IMP.core.XYZR(b).set_radius(new_radius)

                 print b.get_name()

                 print "particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius)


     def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):

         #density generation for the EM restraint

         (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)

         #get the number of residues from the pdb bits

         res_ind=[]

         for pb in pdbbits+helixbits:

             for p in IMP.core.get_leaves(pb):

                 res_ind+=IMP.pmi.tools.get_residue_indexes(p)


         number_of_residues=len(set(res_ind))

         outhier=[]

         if read:

             if len(pdbbits)!=0:

                 outhier+=simo.add_component_density(compname,

                                          pdbbits,

                                          num_components=num_components,

                                          resolution=0,

                                          inputfile=txtfilename)

             if len(helixbits)!=0:

                 outhier+=simo.add_component_density(compname,

                                          helixbits,

                                          num_components=num_components,

                                          resolution=1,

                                          inputfile=txtfilename)


         else:

             if len(pdbbits)!=0:

                 num_components=number_of_residues/abs(num_components)

                 outhier+=simo.add_component_density(compname,

                                          pdbbits,

                                          num_components=num_components,

                                          resolution=0,

                                          outputfile=txtfilename,

                                          outputmap=mrcfilename,

                                          multiply_by_total_mass=True)


             if len(helixbits)!=0:

                 num_components=number_of_residues/abs(num_components)

                 outhier+=simo.add_component_density(compname,

                                          helixbits,

                                          num_components=num_components,

                                          resolution=1,

                                          outputfile=txtfilename,

                                          outputmap=mrcfilename,

                                          multiply_by_total_mass=True)


         return outhier,beadbits


     def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,

                   beadsize=5,color=0.0,offset=0):

         if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :

             if resrange[-1]==-1:

                 resrange=(resrange[0],len(simo.sequence_dict[comname]))

             if include_res0:

                 outhier=simo.autobuild_model(comname,

                                  pdbname=pdbname,

                                  chain=chain,

                                  resrange=resrange,

                                  resolutions=[0,1,10],

                                  offset=offset,

                                  color=color,

                                  missingbeadsize=beadsize)

             else:

                 outhier=simo.autobuild_model(comname,

                                  pdbname=pdbname,

                                  chain=chain,

                                  resrange=resrange,

                                  resolutions=[1,10],

                                  offset=offset,

                                  color=color,

                                  missingbeadsize=beadsize)


         elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :

             outhier=simo.add_component_ideal_helix(comname,

                                                 resolutions=[1,10],

                                                 resrange=resrange,

                                                 color=color,

                                                 show=False)


         elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :

             outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)


         else:


             seq_len=len(simo.sequence_dict[comname])

             outhier=simo.add_component_necklace(comname,

                                   begin=1,

                                   end=seq_len,

                                   length=beadsize)


         return outhier


 class BuildModel1(object):

     """Deprecated building macro - use BuildModel()

     @param representation The PMI representation"""


     def __init__(self, representation):

         self.simo=representation

         self.gmm_models_directory="."


     def set_gmm_models_directory(self,directory_name):

         self.gmm_models_directory=directory_name


     def build_model(self,data_structure,sequence_connectivity_scale=4.0):

         """Create model.

         @param data_structure List of lists containing these entries:

              comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,

              res_range, read_em_files, bead_size, rb, super_rb,

              em_num_components, em_txt_file_name, em_mrc_file_name

         @param sequence_connectivity_scale

         """

         self.domain_dict={}

         self.resdensities={}

         super_rigid_bodies={}

         chain_super_rigid_bodies={}

         rigid_bodies={}


         for d in data_structure:

             comp_name         = d[0]

             hier_name         = d[1]

             color             = d[2]

             fasta_file        = d[3]

             fasta_id          = d[4]

             pdb_name          = d[5]

             chain_id          = d[6]

             res_range         = d[7][0:2]

             try:

                 offset         = d[7][2]

             except:

                 offset         = 0

             read_em_files     = d[8]

             bead_size         = d[9]

             rb                = d[10]

             super_rb          = d[11]

             em_num_components = d[12]

             em_txt_file_name  = d[13]

             em_mrc_file_name  = d[14]

             chain_of_super_rb = d[15]


             if comp_name not in self.simo.get_component_names():

                 self.simo.create_component(comp_name,color=0.0)

                 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)

             outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)


             if not read_em_files is None:

                 if em_txt_file_name is " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"

                 if em_mrc_file_name is " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"


                 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)

                 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)

                 dens_hier+=beads


             else:

                 dens_hier=[]


             self.resdensities[hier_name]=dens_hier

             self.domain_dict[hier_name]=outhier+dens_hier


             if rb is not None:

                 if rb not in rigid_bodies:

                     rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]

                 else:

                     rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]


             if super_rb is not None:

                 for k in super_rb:

                     if k not in super_rigid_bodies:

                         super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]

                     else:

                         super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]


             if  chain_of_super_rb is not None:

                 for k in chain_of_super_rb:

                     if k not in chain_super_rigid_bodies:

                         chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]

                     else:

                         chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]


         self.rigid_bodies=rigid_bodies


         for c in self.simo.get_component_names():

             self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)

             self.simo.setup_component_geometry(c)


         for rb in rigid_bodies:

             self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])


         for k in super_rigid_bodies:

             self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])


         for k in chain_super_rigid_bodies:

             self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)


         self.simo.set_floppy_bodies()

         self.simo.setup_bonds()


     def get_density_hierarchies(self,hier_name_list):

         # return a list of density hierarchies

         # specify the list of hierarchy names

         dens_hier_list=[]

         for hn in hier_name_list:

             print hn

             dens_hier_list+=self.resdensities[hn]

         return dens_hier_list


     def get_pdb_bead_bits(self,hierarchy):

         pdbbits=[]

         beadbits=[]

         helixbits=[]

         for h in hierarchy:

             if "_pdb" in h.get_name():pdbbits.append(h)

             if "_bead" in h.get_name():beadbits.append(h)

             if "_helix" in h.get_name():helixbits.append(h)

         return (pdbbits,beadbits,helixbits)


     def scale_bead_radii(self,nresidues,scale):

         scaled_beads=set()

         for h in self.domain_dict:

             (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])

             slope=(1.0-scale)/(1.0-float(nresidues))


             for b in beadbits:

                 # I have to do the following

                 # because otherwise we'll scale more than once

                 if b not in scaled_beads:

                     scaled_beads.add(b)

                 else:

                     continue

                 radius=IMP.core.XYZR(b).get_radius()

                 num_residues=len(IMP.pmi.tools.get_residue_indexes(b))

                 scale_factor=slope*float(num_residues)+1.0

                 print scale_factor

                 new_radius=scale_factor*radius

                 IMP.core.XYZR(b).set_radius(new_radius)

                 print b.get_name()

                 print "particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius)


     def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):

         #density generation for the EM restraint

         (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)


         #get the number of residues from the pdb bits

         res_ind=[]

         for pb in pdbbits+helixbits:

             for p in IMP.core.get_leaves(pb):

                 res_ind+=IMP.pmi.tools.get_residue_indexes(p)


         number_of_residues=len(set(res_ind))

         outhier=[]

         if read:

             if len(pdbbits)!=0:

                 outhier+=simo.add_component_density(compname,

                                          pdbbits,

                                          num_components=num_components, # number of gaussian into which the simulated density is approximated

                                          resolution=0,      # resolution that you want to calculate the simulated density

                                          inputfile=txtfilename) # read what it was calculated before

             if len(helixbits)!=0:

                 outhier+=simo.add_component_density(compname,

                                          helixbits,

                                          num_components=num_components, # number of gaussian into which the simulated density is approximated

                                          resolution=1,      # resolution that you want to calculate the simulated density

                                          inputfile=txtfilename) # read what it was calculated before


         else:

             if len(pdbbits)!=0:

                 if num_components<0:

                     #if negative calculate the number of gmm components automatically

                     # from the number of residues

                     num_components=number_of_residues/abs(num_components)

                 outhier+=simo.add_component_density(compname,

                                          pdbbits,

                                          num_components=num_components, # number of gaussian into which the simulated density is approximated

                                          resolution=0,      # resolution that you want to calculate the simulated density

                                          outputfile=txtfilename, # do the calculation

                                          outputmap=mrcfilename,

                                          multiply_by_total_mass=True) # do the calculation and output the mrc


             if len(helixbits)!=0:

                 if num_components<0:

                     #if negative calculate the number of gmm components automatically

                     # from the number of residues

                     num_components=number_of_residues/abs(num_components)

                 outhier+=simo.add_component_density(compname,

                                          helixbits,

                                          num_components=num_components, # number of gaussian into which the simulated density is approximated

                                          resolution=1,      # resolution that you want to calculate the simulated density

                                          outputfile=txtfilename, # do the calculation

                                          outputmap=mrcfilename,

                                          multiply_by_total_mass=True) # do the calculation and output the mrc


         return outhier,beadbits


     def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):


         if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :

             if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))

             if read==False:

                 outhier=simo.autobuild_model(comname,

                                  pdbname=pdbname,

                                  chain=chain,

                                  resrange=resrange,

                                  resolutions=[0,1,10],

                                  offset=offset,

                                  color=color,

                                  missingbeadsize=beadsize)

             else:

                 outhier=simo.autobuild_model(comname,

                                  pdbname=pdbname,

                                  chain=chain,

                                  resrange=resrange,

                                  resolutions=[1,10],

                                  offset=offset,

                                  color=color,

                                  missingbeadsize=beadsize)


         elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :


             outhier=simo.add_component_ideal_helix(comname,

                                                 resolutions=[1,10],

                                                 resrange=resrange,

                                                 color=color,

                                                 show=False)


         elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :

             outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)


         else:


             seq_len=len(simo.sequence_dict[comname])

             outhier=simo.add_component_necklace(comname,

                                   begin=1,

                                   end=seq_len,

                                   length=beadsize)


         return outhier


 # ----------------------------------------------------------------------


 class AnalysisReplicaExchange0(object):

     """A macro for running all the basic operations of analysis.

     Includes clustering, precision analysis, and making ensemble density maps.

     A number of plots are also supported.

     @param model                           The IMP model

     @param stat_file_name_suffix

     @param merge_directories               The directories containing output files

     @param best_pdb_name_suffix

     @param do_clean_first

     @param do_create_directories

     @param global_output_directory          Where everything is

     @param replica_stat_file_suffix

     @param global_analysis_result_directory

     """

     def __init__(self, model,

                  merge_directories=["./"],

                  stat_file_name_suffix="stat",

                  best_pdb_name_suffix="model",

                  do_clean_first=True,

                  do_create_directories=True,

                  global_output_directory="output/",

                  replica_stat_file_suffix="stat_replica",

                  global_analysis_result_directory="./analysis/"):


         try:

             from mpi4py import MPI

             self.comm = MPI.COMM_WORLD

             self.rank = self.comm.Get_rank()

             self.number_of_processes = self.comm.size

         except ImportError:

             self.rank = 0

             self.number_of_processes = 1


         self.model = model

         stat_dir = global_output_directory

         self.stat_files = []

         # it contains the position of the root directories

         for rd in merge_directories:

             stat_files = glob.glob(rd + "/" + stat_dir + "/stat.*.out")

             if len(stat_files)==0:

                 print "WARNING: no stat files found in",rd + "/" + stat_dir + "/stat.*.out"

             self.stat_files += stat_files


     def get_modeling_trajectory(self,

                                 score_key="SimplifiedModel_Total_Score_None",

                                 rmf_file_key="rmf_file",

                                 rmf_file_frame_key="rmf_frame_index",

                                 outputdir="./",

                                 get_every=1,

                                 nframes_trajectory=10000):

         """ Get a trajectory of the modeling run, for generating demonstrative movies

         @param score_key                           The score for ranking models

         @param rmf_file_key                        Key pointing to RMF filename

         @param rmf_file_frame_key                  Key pointing to RMF frame number

         @param outputdir                           The local output directory used in the run

         @param get_every                           Extract every nth frame

         @param nframes_trajectory                  Total number of frames of the trajectory

         """

         from operator import itemgetter

         import math


         trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,

                                                  score_key,

                                                  rmf_file_key,

                                                  rmf_file_frame_key,

                                                  get_every)

         rmf_file_list=trajectory_models[0]

         rmf_file_frame_list=trajectory_models[1]

         score_list=map(float, trajectory_models[2])


         max_score=max(score_list)

         min_score=min(score_list)


         bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]

         binned_scores=[None]*nframes_trajectory

         binned_model_indexes=[-1]*nframes_trajectory


         for model_index,s in enumerate(score_list):

             bins_score_diffs=[abs(s-b) for b in bins]

             bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]

             if binned_scores[bin_index]==None:

                 binned_scores[bin_index]=s

                 binned_model_indexes[bin_index]=model_index

             else:

                 old_diff=abs(binned_scores[bin_index]-bins[bin_index])

                 new_diff=abs(s-bins[bin_index])

                 if new_diff < old_diff:

                     binned_scores[bin_index]=s

                     binned_model_indexes[bin_index]=model_index


         print binned_scores

         print binned_model_indexes


     def clustering(self,

                    score_key="SimplifiedModel_Total_Score_None",

                    rmf_file_key="rmf_file",

                    rmf_file_frame_key="rmf_frame_index",

                    prefiltervalue=None,

                    feature_keys=[],

                    outputdir="./",

                    alignment_components=None,

                    number_of_best_scoring_models=10,

                    rmsd_calculation_components=None,

                    distance_matrix_file=None,

                    load_distance_matrix_file=False,

                    skip_clustering=False,

                    number_of_clusters=1,

                    display_plot=False,

                    exit_after_display=True,

                    get_every=1,

                    first_and_last_frames=None,

                    density_custom_ranges=None,

                    write_pdb_with_centered_coordinates=False,

                    voxel_size=5.0):

         """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps

         @param score_key                           The score for ranking models

         @param rmf_file_key                        Key pointing to RMF filename

         @param rmf_file_frame_key                  Key pointing to RMF frame number

         @param prefiltervalue                      Only include frames where the score key is below this value

         @param feature_keys                        Keywords for which you want to calculate average,

                                                     medians, etc,

         @param outputdir                           The local output directory used in the run

         @param alignment_components                List of tuples for aligning the structures

                                                    e.g. ["Rpb1", (20,100,"Rpb2"), .....]

         @param number_of_best_scoring_models       Num models to keep per run

         @param rmsd_calculation_components         List of tuples for calculating RMSD

                                                    e.g. ["Rpb1", (20,100,"Rpb2"), .....]

         @param distance_matrix_file                Where to store/read the distance matrix

         @param load_distance_matrix_file           Try to load the distance matrix file

         @param skip_clustering                     Just extract the best scoring models and save the pdbs

         @param number_of_clusters                  Number of k-means clusters

         @param display_plot                        Display the distance matrix

         @param exit_after_display                  Exit after displaying distance matrix

         @param get_every                           Extract every nth frame

         @param first_and_last_frames               A tuple with the first and last frames to be

                                                    analyzed. Values are percentages!

                                                    Default: get all frames

         @param density_custom_ranges               List of tuples or strings for density calculation

                                                    e.g. ["Rpb1", (20,100,"Rpb2"), .....]

         @param write_pdb_with_centered_coordinates

         @param voxel_size                          Used for the density output

         """


         if self.rank==0:

             try:

                 os.mkdir(outputdir)

             except:

                 pass


         if not load_distance_matrix_file:

             if len(self.stat_files)==0: print "ERROR: no stat file found in the given path"; return

             my_stat_files=IMP.pmi.tools.chunk_list_into_segments(

                 self.stat_files,self.number_of_processes)[self.rank]

             best_models = IMP.pmi.io.get_best_models(my_stat_files,

                                                      score_key,

                                                      feature_keys,

                                                      rmf_file_key,

                                                      rmf_file_frame_key,

                                                      prefiltervalue,

                                                      get_every)

             rmf_file_list=best_models[0]

             rmf_file_frame_list=best_models[1]

             score_list=best_models[2]

             feature_keyword_list_dict=best_models[3]


 # ------------------------------------------------------------------------

 # collect all the files and scores

 # ------------------------------------------------------------------------


             if self.number_of_processes > 1:

                 score_list = IMP.pmi.tools.scatter_and_gather(score_list)

                 rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)

                 rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(

                     rmf_file_frame_list)

                 for k in feature_keyword_list_dict:

                     feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(

                         feature_keyword_list_dict[k])


             # sort by score and get the best scoring ones

             score_rmf_tuples = zip(score_list,

                                    rmf_file_list,

                                    rmf_file_frame_list,

                                    range(len(score_list)))


             # keep subset of frames if requested

             if first_and_last_frames is not None:

                 nframes = len(score_rmf_tuples)

                 first_frame = int(first_and_last_frames[0] * nframes)

                 last_frame = int(first_and_last_frames[1] * nframes)

                 if last_frame > len(score_rmf_tuples):

                     last_frame = -1

                 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]


             # sort RMFs by the score_key in ascending order, and store the rank

             best_score_rmf_tuples = sorted(score_rmf_tuples,

                                            key=lambda x: float(x[0]))[:number_of_best_scoring_models]

             best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]


             # sort the feature scores in the same way

             best_score_feature_keyword_list_dict = defaultdict(list)

             for tpl in best_score_rmf_tuples:

                 index = tpl[3]

                 for f in feature_keyword_list_dict:

                     best_score_feature_keyword_list_dict[f].append(

                         feature_keyword_list_dict[f][index])


             my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(

                 best_score_rmf_tuples,

                 self.number_of_processes)[self.rank]


 # ------------------------------------------------------------------------

 # optionally don't compute distance matrix or cluster, just write top files

 # ------------------------------------------------------------------------

             if skip_clustering:

                 dircluster=os.path.join(outputdir,"all_models."+str(n))

                 try:

                     os.mkdir(outputdir)

                 except:

                     pass

                 try:

                     os.mkdir(dircluster)

                 except:

                     pass

                 clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")

                 for cnt,tpl in enumerate(my_best_score_rmf_tuples):

                     rmf_name=tpl[1]

                     rmf_frame_number=tpl[2]

                     tmp_dict={}

                     index=tpl[4]

                     for key in best_score_feature_keyword_list_dict:

                         tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]


                     prot=IMP.pmi.analysis.get_hier_from_rmf(self.model,rmf_frame_number,rmf_name)

                     if not prot:

                         continue


                     o=IMP.pmi.output.Output()

                     out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")

                     out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf")

                     o.init_pdb(out_pdb_fn,prot)

                     o.write_pdb(out_pdb_fn,

                                 translate_to_geometric_center=write_pdb_with_centered_coordinates)


                     tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)

                     tmp_dict["rmf_file_full_path"]=rmf_name

                     tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)

                     tmp_dict["local_rmf_frame_number"]=0


                     clusstat.write(str(tmp_dict)+"\n")

                     o.init_rmf(out_rmf_fn,[prot])

                     o.write_rmf(out_rmf_fn)

                     o.close_rmf(out_rmf_fn)

                 return


 #-------------------------------------------------------------

 # read the coordinates

 # ------------------------------------------------------------

             rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,

                                                      my_best_score_rmf_tuples[0],

                                                      rmsd_calculation_components)

             got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,

                                                              my_best_score_rmf_tuples,

                                                              alignment_components,

                                                              rmsd_calculation_components)


             # note! the coordinates are simple float tuples, NOT decorators, NOT Vector3D,

             # NOR particles, because these object cannot be serialized. We need serialization

             # for the parallel computation based on mpi.

             all_coordinates=got_coords[0]          # dict:key=component name,val=coords per hit

             alignment_coordinates=got_coords[1]    # same as above, limited to alignment bits

             rmsd_coordinates=got_coords[2]         # same as above, limited to RMSD bits

             rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank

             all_rmf_file_names=got_coords[4]       # RMF file per hit


             # broadcast the coordinates

             if self.number_of_processes > 1:

                 all_coordinates = IMP.pmi.tools.scatter_and_gather(

                     all_coordinates)

                 all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(

                     all_rmf_file_names)

                 rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(

                     rmf_file_name_index_dict)

                 alignment_coordinates=IMP.pmi.tools.scatter_and_gather(

                     alignment_coordinates)

                 rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(

                     rmsd_coordinates)


             if self.rank == 0:

                 # save needed informations in external files

                 self.save_objects(

                     [best_score_feature_keyword_list_dict,

                      rmf_file_name_index_dict],

                     ".macro.pkl")


 # ------------------------------------------------------------------------

 # Calculate distance matrix and cluster

 # ------------------------------------------------------------------------

             print "setup clustering class"

             Clusters = IMP.pmi.analysis.Clustering(rmsd_weights)


             for n, model_coordinate_dict in enumerate(all_coordinates):

                 template_coordinate_dict = {}

                 # let's try to align

                 if alignment_components is not None and len(Clusters.all_coords) == 0:

                     # set the first model as template coordinates

                     Clusters.set_template(alignment_coordinates[n])

                 Clusters.fill(all_rmf_file_names[n], rmsd_coordinates[n])


             print "Global calculating the distance matrix"


             # calculate distance matrix, all against all

             Clusters.dist_matrix()


             # perform clustering and optionally display

             if self.rank == 0:

                 Clusters.do_cluster(number_of_clusters)

                 if display_plot:

                     if self.rank == 0:

                         Clusters.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))

                     if exit_after_display:

                         exit()

                 Clusters.save_distance_matrix_file(file_name=distance_matrix_file)


 # ------------------------------------------------------------------------

 # Alteratively, load the distance matrix from file and cluster that

 # ------------------------------------------------------------------------

         else:

             if self.rank==0:

                 print "setup clustering class"

                 Clusters = IMP.pmi.analysis.Clustering()

                 Clusters.load_distance_matrix_file(file_name=distance_matrix_file)

                 print "clustering with %s clusters" % str(number_of_clusters)

                 Clusters.do_cluster(number_of_clusters)

                 [best_score_feature_keyword_list_dict,

                  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")

                 if display_plot:

                     if self.rank == 0:

                         Clusters.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))

                     if exit_after_display:

                         exit()

         if self.number_of_processes > 1:

             self.comm.Barrier()


 # ------------------------------------------------------------------------

 # now save all informations about the clusters

 # ------------------------------------------------------------------------


         if self.rank == 0:

             print Clusters.get_cluster_labels()

             for n, cl in enumerate(Clusters.get_cluster_labels()):

                 print "rank %s " % str(self.rank)

                 print "cluster %s " % str(n)

                 print "cluster label %s " % str(cl)

                 print Clusters.get_cluster_label_names(cl)


                 # first initialize the Density class if requested


                 if density_custom_ranges:

                     DensModule = IMP.pmi.analysis.GetModelDensity(

                         density_custom_ranges,

                         voxel=voxel_size)


                 dircluster = outputdir + "/cluster." + str(n) + "/"

                 try:

                     os.mkdir(dircluster)

                 except:

                     pass


                 rmsd_dict = {"AVERAGE_RMSD":

                              str(Clusters.get_cluster_label_average_rmsd(cl))}

                 clusstat = open(dircluster + "stat.out", "w")

                 for k, structure_name in enumerate(Clusters.get_cluster_label_names(cl)):


                     # extract the features

                     tmp_dict = {}

                     tmp_dict.update(rmsd_dict)

                     index = rmf_file_name_index_dict[structure_name]

                     for key in best_score_feature_keyword_list_dict:

                         tmp_dict[

                             key] = best_score_feature_keyword_list_dict[

                             key][

                             index]

                     # get the rmf name and the frame number from the list of

                     # frame names

                     rmf_name = structure_name.split("|")[0]

                     rmf_frame_number = int(structure_name.split("|")[1])


                     clusstat.write(str(tmp_dict) + "\n")

                     prot,rs = IMP.pmi.analysis.get_hier_and_restraints_from_rmf(

                         self.model,

                         rmf_frame_number,

                         rmf_name)

                     if not prot:

                         continue


                     if k > 0:

                         model_index = Clusters.get_model_index_from_name(

                             structure_name)

                         transformation = Clusters.get_transformation_to_first_member(

                             cl,

                             model_index)


                         rbs = set()

                         for p in IMP.atom.get_leaves(prot):

                             if not IMP.core.XYZR.get_is_setup(p):

                                 IMP.core.XYZR.setup_particle(p)

                                 IMP.core.XYZR(p).set_radius(0.0001)

                                 IMP.core.XYZR(p).set_coordinates((0, 0, 0))


                             if IMP.core.RigidBodyMember.get_is_setup(p):

                                 rb = IMP.core.RigidBodyMember(p).get_rigid_body()

                                 rbs.add(rb)

                             else:

                                 IMP.core.transform(IMP.core.XYZ(p),

                                                    transformation)

                         for rb in rbs:

                             IMP.core.transform(rb,transformation)


                     # add the density

                     if density_custom_ranges:

                         DensModule.add_subunits_density(prot)


                     # pdb writing should be optimized!

                     o = IMP.pmi.output.Output()

                     o.init_pdb(dircluster + str(k) + ".pdb", prot)

                     o.write_pdb(dircluster + str(k) + ".pdb")


                     o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)

                     # IMP.rmf.add_restraints(o.dictionary_rmfs[dircluster+str(n)+".rmf3"],restraints)

                     o.write_rmf(dircluster + str(k) + ".rmf3")

                     o.close_rmf(dircluster + str(k) + ".rmf3")


                     del o

                     # IMP.atom.destroy(prot)


                 if density_custom_ranges:

                     DensModule.write_mrc(path=dircluster)

                     del DensModule


         if self.number_of_processes>1:

             self.comm.Barrier()


     def save_objects(self, objects, file_name):

         import pickle

         outf = open(file_name, 'w')

         pickle.dump(objects, outf)

         outf.close()


     def load_objects(self, file_name):

         import pickle

         inputf = open(file_name, 'r')

         objects = pickle.load(inputf)

         inputf.close()

         return objects

IMP::pmi.macros.AnalysisReplicaExchange0
A macro for running all the basic operations of analysis.
Definition: macros.py:969

IMP::core::RigidBodyMember
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:368

IMP::pmi.tools
Miscellaneous utilities.
Definition: tools.py:1

IMP::pmi.macros.AnalysisReplicaExchange0.clustering
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1067

IMP::core::get_leaves
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.

IMP::pmi.analysis.Clustering
A class to cluster structures.
Definition: pmi/Analysis.py:180

IMP::pmi.representation
Representation of the system.
Definition: representation.py:1

IMP::pmi.macros.AnalysisReplicaExchange0.get_modeling_trajectory
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1014

IMP::core::XYZR::get_is_setup
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
Definition: XYZR.h:47

IMP::core::XYZR::setup_particle
static XYZR setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: XYZR.h:48

IMP::pmi.macros.BuildModel1.build_model
def build_model
Create model.
Definition: macros.py:724

IMP::pmi.tools.scatter_and_gather
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:961

IMP::core::transform
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.

IMP::atom::get_by_type
Hierarchies get_by_type(Hierarchy mhd, GetByType t)

IMP::core::XYZ
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30

IMP::pmi.macros.BuildModel0
def BuildModel0
The macro construct a component for each subunit (no splitting, nothing fancy) You can pass the resol...
Definition: macros.py:363

IMP::core::RigidBodyMember::get_is_setup
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
Definition: rigid_bodies.h:369

IMP::pmi.output.Output
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:19

IMP::pmi.macros.BuildModel
A macro to build a Representation based on a Topology and lists of movers.
Definition: macros.py:446

IMP::pmi.analysis
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1

IMP::pmi.output
Classes for writing output files and processing them.
Definition: output.py:1

IMP::pmi.samplers
Sampling of the system.
Definition: samplers.py:1

IMP::pmi.macros.BuildModel1
Deprecated building macro - use BuildModel()
Definition: macros.py:713

IMP::atom::get_leaves
Hierarchies get_leaves(const Selection &h)

IMP::pmi.analysis.GetModelDensity
A class to compute mean density maps from structures Keeps a dictionary of density maps...
Definition: pmi/Analysis.py:922

IMP::rmf
Support for the RMF file format for storing hierarchical molecular data and markup.

IMP::pmi.tools.get_residue_indexes
def get_residue_indexes
This "overloaded" function retrieves the residue indexes for each particle which is an instance of Fr...
Definition: tools.py:920

IMP::pmi.macros.BuildModel.get_representation
def get_representation
Return the Representation object.
Definition: macros.py:569

IMP::core::XYZR
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27