doc/ref/macros_8py_source.html

 """@namespace IMP.pmi.macros

 Protocols for sampling structures and analyzing them.

 """


 import IMP

 import IMP.pmi.tools

 import IMP.pmi.samplers

 import IMP.pmi.output

 import IMP.pmi.analysis

 import IMP.pmi.io

 import IMP.pmi.alphabets

 import IMP.rmf

 import IMP.isd

 import IMP.pmi.dof

 import os

 from pathlib import Path

 import glob

 from operator import itemgetter

 from collections import defaultdict

 import numpy as np

 import itertools

 import warnings

 import math


 import pickle


 class _MockMPIValues:

     """Replace samplers.MPI_values when in test mode"""

     def get_percentile(self, name):

         return 0.


 class _RMFRestraints:

     """All restraints that are written out to the RMF file"""

     def __init__(self, model, user_restraints):

         self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)

         self._user_restraints = user_restraints if user_restraints else []


     def __len__(self):

         return (len(self._user_restraints)

                 + self._rmf_rs.get_number_of_restraints())


     def __bool__(self):

         return len(self) > 0


     def __getitem__(self, i):

         class FakePMIWrapper:

             def __init__(self, r):

                 self.r = IMP.RestraintSet.get_from(r)


             def get_restraint(self):

                 return self.r


         lenuser = len(self._user_restraints)

         if 0 <= i < lenuser:

             return self._user_restraints[i]

         elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():

             r = self._rmf_rs.get_restraint(i - lenuser)

             return FakePMIWrapper(r)

         else:

             raise IndexError("Out of range")


 class ReplicaExchange:

     """A macro to help setup and run replica exchange.

     Supports Monte Carlo and molecular dynamics.

     Produces trajectory RMF files, best PDB structures,

     and output stat files.

     """

     def __init__(self, model, root_hier,

                  monte_carlo_sample_objects=None,

                  molecular_dynamics_sample_objects=None,

                  output_objects=[],

                  rmf_output_objects=None,

                  monte_carlo_temperature=1.0,

                  simulated_annealing=False,

                  simulated_annealing_minimum_temperature=1.0,

                  simulated_annealing_maximum_temperature=2.5,

                  simulated_annealing_minimum_temperature_nframes=100,

                  simulated_annealing_maximum_temperature_nframes=100,

                  replica_exchange_minimum_temperature=1.0,

                  replica_exchange_maximum_temperature=2.5,

                  replica_exchange_swap=True,

                  num_sample_rounds=1,

                  number_of_best_scoring_models=500,

                  monte_carlo_steps=10,

                  self_adaptive=False,

                  molecular_dynamics_steps=10,

                  molecular_dynamics_max_time_step=1.0,

                  number_of_frames=1000,

                  save_coordinates_mode="lowest_temperature",

                  nframes_write_coordinates=1,

                  write_initial_rmf=True,

                  initial_rmf_name_suffix="initial",

                  stat_file_name_suffix="stat",

                  best_pdb_name_suffix="model",

                  mmcif=False,

                  do_clean_first=True,

                  do_create_directories=True,

                  global_output_directory="./",

                  rmf_dir="rmfs/",

                  best_pdb_dir="pdbs/",

                  replica_stat_file_suffix="stat_replica",

                  em_object_for_rmf=None,

                  atomistic=False,

                  replica_exchange_object=None,

                  test_mode=False,

                  score_moved=False,

                  use_nestor=False,

                  nestor_restraints=None,

                  nestor_rmf_fname_prefix="nested",):

         """Constructor.

            @param model The IMP model

            @param root_hier Top-level (System)hierarchy

            @param monte_carlo_sample_objects Objects for MC sampling, which

                   should generally be a simple list of Mover objects, e.g.

                   from DegreesOfFreedom.get_movers().

            @param molecular_dynamics_sample_objects Objects for MD sampling,

                   which should generally be a simple list of particles.

            @param output_objects A list of structural objects and restraints

                   that will be included in output (ie, statistics "stat"

                   files). Any object that provides a get_output() method

                   can be used here. If None is passed

                   the macro will not write stat files.

            @param rmf_output_objects A list of structural objects and

                   restraints that will be included in rmf. Any object

                   that provides a get_output() method can be used here.

            @param monte_carlo_temperature  MC temp (may need to be optimized

                   based on post-sampling analysis)

            @param simulated_annealing If True, perform simulated annealing

            @param simulated_annealing_minimum_temperature Should generally be

                   the same as monte_carlo_temperature.

            @param simulated_annealing_minimum_temperature_nframes Number of

                   frames to compute at minimum temperature.

            @param simulated_annealing_maximum_temperature_nframes Number of

                   frames to compute at

                   temps > simulated_annealing_maximum_temperature.

            @param replica_exchange_minimum_temperature Low temp for REX; should

                   generally be the same as monte_carlo_temperature.

            @param replica_exchange_maximum_temperature High temp for REX

            @param replica_exchange_swap Boolean, enable disable temperature

                   swap (Default=True)

            @param num_sample_rounds        Number of rounds of MC/MD per cycle

            @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF

                   models to keep around for analysis.

            @param mmcif If True, write best scoring models in mmCIF format;

                   if False (the default), write in legacy PDB format.

            @param best_pdb_dir The directory under `global_output_directory`

                   where best-scoring PDB/mmCIF files are written.

            @param best_pdb_name_suffix Part of the file name for best-scoring

                   PDB/mmCIF files.

            @param monte_carlo_steps        Number of MC steps per round

            @param self_adaptive     self adaptive scheme for Monte Carlo movers

            @param molecular_dynamics_steps  Number of MD steps per round

            @param molecular_dynamics_max_time_step Max time step for MD

            @param number_of_frames         Number of REX frames to run

            @param save_coordinates_mode  string: how to save coordinates.

                   "lowest_temperature" (default) only the lowest temperatures

                   is saved

                   "25th_score" all replicas whose score is below the 25th

                   percentile

                   "50th_score" all replicas whose score is below the 50th

                   percentile

                   "75th_score" all replicas whose score is below the 75th

                   percentile

            @param nframes_write_coordinates How often to write the coordinates

                   of a frame

            @param write_initial_rmf        Write the initial configuration

            @param global_output_directory Folder that will be created to house

                   output.

            @param test_mode Set to True to avoid writing any files, just test

                   one frame.

            @param score_moved   If True, attempt to speed up Monte Carlo

                   sampling by caching scoring function terms on particles

                   that didn't move.

            @param use_nestor   If True, follows the Nested Sampling workflow

                   of the NestOR module and skips writing stat files and

                   replica stat files.

            @param nestor_restraints   A list of restraints for which

                   likelihoods are to be computed for use by NestOR module.

            @param nestor_rmf_fname_prefix   Prefix to be used for storing .rmf3

                   files generated by NestOR .

         """

         self.model = model

         self.vars = {}


         # add check hierarchy is multistate

         if output_objects == []:

             # The "[]" in the default parameters is a global object, so make

             # our own copy here

             self.output_objects = []

         else:

             self.output_objects = output_objects

         self.rmf_output_objects = rmf_output_objects

         if (isinstance(root_hier, IMP.atom.Hierarchy)

                 and not root_hier.get_parent()):

             if self.output_objects is not None:

                 self.output_objects.append(

                     IMP.pmi.io.TotalScoreOutput(self.model))

             if self.rmf_output_objects is not None:

                 self.rmf_output_objects.append(

                     IMP.pmi.io.TotalScoreOutput(self.model))

             self.root_hier = root_hier

             states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)

             self.vars["number_of_states"] = len(states)

             if len(states) > 1:

                 self.root_hiers = states

                 self.is_multi_state = True

             else:

                 self.root_hier = root_hier

                 self.is_multi_state = False

         else:

             raise TypeError("Must provide System hierarchy (root_hier)")


         self._rmf_restraints = _RMFRestraints(model, None)

         self.em_object_for_rmf = em_object_for_rmf

         self.monte_carlo_sample_objects = monte_carlo_sample_objects

         self.vars["self_adaptive"] = self_adaptive

         self.molecular_dynamics_sample_objects = \

             molecular_dynamics_sample_objects

         self.replica_exchange_object = replica_exchange_object

         self.molecular_dynamics_max_time_step = \

             molecular_dynamics_max_time_step

         self.vars["monte_carlo_temperature"] = monte_carlo_temperature

         self.vars["replica_exchange_minimum_temperature"] = \

             replica_exchange_minimum_temperature

         self.vars["replica_exchange_maximum_temperature"] = \

             replica_exchange_maximum_temperature

         self.vars["replica_exchange_swap"] = replica_exchange_swap

         self.vars["simulated_annealing"] = simulated_annealing

         self.vars["simulated_annealing_minimum_temperature"] = \

             simulated_annealing_minimum_temperature

         self.vars["simulated_annealing_maximum_temperature"] = \

             simulated_annealing_maximum_temperature

         self.vars["simulated_annealing_minimum_temperature_nframes"] = \

             simulated_annealing_minimum_temperature_nframes

         self.vars["simulated_annealing_maximum_temperature_nframes"] = \

             simulated_annealing_maximum_temperature_nframes


         self.vars["num_sample_rounds"] = num_sample_rounds

         self.vars[

             "number_of_best_scoring_models"] = number_of_best_scoring_models

         self.vars["monte_carlo_steps"] = monte_carlo_steps

         self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps

         self.vars["number_of_frames"] = number_of_frames

         if save_coordinates_mode not in ("lowest_temperature", "25th_score",

                                          "50th_score", "75th_score"):

             raise Exception("save_coordinates_mode has unrecognized value")

         else:

             self.vars["save_coordinates_mode"] = save_coordinates_mode

         self.vars["nframes_write_coordinates"] = nframes_write_coordinates

         self.vars["write_initial_rmf"] = write_initial_rmf

         self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix

         self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix

         self.vars["mmcif"] = mmcif

         self.vars["stat_file_name_suffix"] = stat_file_name_suffix

         self.vars["do_clean_first"] = do_clean_first

         self.vars["do_create_directories"] = do_create_directories

         self.vars["global_output_directory"] = global_output_directory

         self.vars["rmf_dir"] = rmf_dir

         self.vars["best_pdb_dir"] = best_pdb_dir

         self.vars["atomistic"] = atomistic

         self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix

         self.vars["geometries"] = None

         self.test_mode = test_mode

         self.score_moved = score_moved

         self.nest = use_nestor

         self.nestor_restraints = nestor_restraints

         self.nestor_rmf_fname = nestor_rmf_fname_prefix


     def add_geometries(self, geometries):

         if self.vars["geometries"] is None:

             self.vars["geometries"] = list(geometries)

         else:

             self.vars["geometries"].extend(geometries)


     def show_info(self):

         print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "

               "rmfs/*.rmf3 for each replica ")

         print("--- it stores the best scoring pdb models in pdbs/")

         print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "

               "lowest temperature")

         print("--- variables:")

         keys = list(self.vars.keys())

         keys.sort()

         for v in keys:

             print("------", v.ljust(30), self.vars[v])

         print("Use nestor: ", self.nest)


     def get_replica_exchange_object(self):

         return self.replica_exchange_object


     def _add_provenance(self, sampler_md, sampler_mc):

         """Record details about the sampling in the IMP Hierarchies"""

         iterations = 0

         if sampler_md:

             method = "Molecular Dynamics"

             iterations += self.vars["molecular_dynamics_steps"]

         if sampler_mc:

             method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"

             iterations += self.vars["monte_carlo_steps"]

         # If no sampling is actually done, no provenance to write

         if iterations == 0 or self.vars["number_of_frames"] == 0:

             return

         iterations *= self.vars["num_sample_rounds"]


         pi = self.model.add_particle("sampling")

         p = IMP.core.SampleProvenance.setup_particle(

                 self.model, pi, method, self.vars["number_of_frames"],

                 iterations)

         p.set_number_of_replicas(

                 self.replica_exchange_object.get_number_of_replicas())

         IMP.pmi.tools._add_pmi_provenance(self.root_hier)

         IMP.core.add_provenance(self.model, self.root_hier, p)


     def execute_macro(self):

         temp_index_factor = 100000.0

         samplers = []

         sampler_mc = None

         sampler_md = None

         if self.monte_carlo_sample_objects is not None:

             print("Setting up MonteCarlo")

             sampler_mc = IMP.pmi.samplers.MonteCarlo(

                 self.model, self.monte_carlo_sample_objects,

                 self.vars["monte_carlo_temperature"],

                 score_moved=self.score_moved)

             if self.vars["simulated_annealing"]:

                 tmin = self.vars["simulated_annealing_minimum_temperature"]

                 tmax = self.vars["simulated_annealing_maximum_temperature"]

                 nfmin = self.vars[

                     "simulated_annealing_minimum_temperature_nframes"]

                 nfmax = self.vars[

                     "simulated_annealing_maximum_temperature_nframes"]

                 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)

             if self.vars["self_adaptive"]:

                 sampler_mc.set_self_adaptive(

                     isselfadaptive=self.vars["self_adaptive"])

             if self.output_objects is not None:

                 self.output_objects.append(sampler_mc)

             if self.rmf_output_objects is not None:

                 self.rmf_output_objects.append(sampler_mc)

             samplers.append(sampler_mc)


         if self.molecular_dynamics_sample_objects is not None:

             print("Setting up MolecularDynamics")

             sampler_md = IMP.pmi.samplers.MolecularDynamics(

                 self.model, self.molecular_dynamics_sample_objects,

                 self.vars["monte_carlo_temperature"],

                 maximum_time_step=self.molecular_dynamics_max_time_step)

             if self.vars["simulated_annealing"]:

                 tmin = self.vars["simulated_annealing_minimum_temperature"]

                 tmax = self.vars["simulated_annealing_maximum_temperature"]

                 nfmin = self.vars[

                     "simulated_annealing_minimum_temperature_nframes"]

                 nfmax = self.vars[

                     "simulated_annealing_maximum_temperature_nframes"]

                 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)

             if self.output_objects is not None:

                 self.output_objects.append(sampler_md)

             if self.rmf_output_objects is not None:

                 self.rmf_output_objects.append(sampler_md)

             samplers.append(sampler_md)

 # -------------------------------------------------------------------------


         print("Setting up ReplicaExchange")

         rex = IMP.pmi.samplers.ReplicaExchange(

             self.model, self.vars["replica_exchange_minimum_temperature"],

             self.vars["replica_exchange_maximum_temperature"], samplers,

             replica_exchange_object=self.replica_exchange_object)

         self.replica_exchange_object = rex.rem


         myindex = rex.get_my_index()

         if self.output_objects is not None:

             self.output_objects.append(rex)

         if self.rmf_output_objects is not None:

             self.rmf_output_objects.append(rex)

         # must reset the minimum temperature due to the

         # different binary length of rem.get_my_parameter double and python

         # float

         min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)


 # -------------------------------------------------------------------------


         globaldir = self.vars["global_output_directory"] + "/"

         rmf_dir = globaldir + self.vars["rmf_dir"]

         pdb_dir = globaldir + self.vars["best_pdb_dir"]


         if not self.test_mode and not self.nest:

             if self.vars["do_clean_first"]:

                 pass


             if self.vars["do_create_directories"]:


                 try:

                     os.makedirs(globaldir)

                 except:  # noqa: E722

                     pass

                 try:

                     os.makedirs(rmf_dir)

                 except:  # noqa: E722

                     pass


                 if not self.is_multi_state:

                     try:

                         os.makedirs(pdb_dir)

                     except:  # noqa: E722

                         pass

                 else:

                     for n in range(self.vars["number_of_states"]):

                         try:

                             os.makedirs(pdb_dir + "/" + str(n))

                         except:  # noqa: E722

                             pass


 # -------------------------------------------------------------------------


         sw = IMP.pmi.tools.Stopwatch()

         if self.output_objects is not None:

             self.output_objects.append(sw)

         if self.rmf_output_objects is not None:

             self.rmf_output_objects.append(sw)


         output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])


         if not self.nest:

             print("Setting up stat file")

             low_temp_stat_file = globaldir + \

                 self.vars["stat_file_name_suffix"] + "." + \

                 str(myindex) + ".out"


         # Ensure model is updated before saving init files

         if not self.test_mode:

             self.model.update()


         if not self.test_mode and not self.nest:

             if self.output_objects is not None:

                 output.init_stat2(low_temp_stat_file,

                                   self.output_objects,

                                   extralabels=["rmf_file", "rmf_frame_index"])

         else:

             print("Stat file writing is disabled")


         if self.rmf_output_objects is not None and not self.nest:

             print("Stat info being written in the rmf file")


         if not self.test_mode and not self.nest:

             print("Setting up replica stat file")

             replica_stat_file = globaldir + \

                 self.vars["replica_stat_file_suffix"] + "." + \

                 str(myindex) + ".out"

             if not self.test_mode:

                 output.init_stat2(replica_stat_file, [rex],

                                   extralabels=["score"])


             print("Setting up best pdb files")

             if not self.is_multi_state:

                 if self.vars["number_of_best_scoring_models"] > 0:

                     output.init_pdb_best_scoring(

                         pdb_dir + "/" + self.vars["best_pdb_name_suffix"],

                         self.root_hier,

                         self.vars["number_of_best_scoring_models"],

                         replica_exchange=True,

                         mmcif=self.vars['mmcif'],

                         best_score_file=globaldir + "best.scores.rex.py")

                     pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"

                     output.write_psf(

                         pdb_dir + "/" + "model.psf",

                         pdb_dir + "/" +

                         self.vars["best_pdb_name_suffix"] + pdbext)

             else:

                 if self.vars["number_of_best_scoring_models"] > 0:

                     for n in range(self.vars["number_of_states"]):

                         output.init_pdb_best_scoring(

                             pdb_dir + "/" + str(n) + "/" +

                             self.vars["best_pdb_name_suffix"],

                             self.root_hiers[n],

                             self.vars["number_of_best_scoring_models"],

                             replica_exchange=True,

                             mmcif=self.vars['mmcif'],

                             best_score_file=globaldir + "best.scores.rex.py")

                         pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"

                         output.write_psf(

                             pdb_dir + "/" + str(n) + "/" + "model.psf",

                             pdb_dir + "/" + str(n) + "/" +

                             self.vars["best_pdb_name_suffix"] + pdbext)

 # ---------------------------------------------


         if self.em_object_for_rmf is not None:

             output_hierarchies = [

                 self.root_hier,

                 self.em_object_for_rmf.get_density_as_hierarchy(

                 )]

         else:

             output_hierarchies = [self.root_hier]


         if not self.test_mode and not self.nest:

             print("Setting up and writing initial rmf coordinate file")

             init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]

             output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",

                             output_hierarchies,

                             listofobjects=self.rmf_output_objects)

             if self._rmf_restraints:

                 output.add_restraints_to_rmf(

                     init_suffix + "." + str(myindex) + ".rmf3",

                     self._rmf_restraints)

             output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")

             output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")


         if not self.test_mode:

             mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)

         else:

             mpivs = _MockMPIValues()


         self._add_provenance(sampler_md, sampler_mc)


         if not self.test_mode and not self.nest:

             print("Setting up production rmf files")

             rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"

             output.init_rmf(rmfname, output_hierarchies,

                             geometries=self.vars["geometries"],

                             listofobjects=self.rmf_output_objects)


             if self._rmf_restraints:

                 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)


         if not self.test_mode and self.nest:

             print("Setting up NestOR rmf files")

             nestor_rmf_fname = str(self.nestor_rmf_fname) + '_' + \

                 str(self.replica_exchange_object.get_my_index()) + '.rmf3'


             output.init_rmf(nestor_rmf_fname, output_hierarchies,

                             geometries=self.vars["geometries"],

                             listofobjects=self.rmf_output_objects)


         ntimes_at_low_temp = 0


         if myindex == 0 and not self.nest:

             self.show_info()

         self.replica_exchange_object.set_was_used(True)

         nframes = self.vars["number_of_frames"]

         if self.test_mode:

             nframes = 1


         sampled_likelihoods = []

         for i in range(nframes):

             if self.test_mode:

                 score = 0.

             else:

                 for nr in range(self.vars["num_sample_rounds"]):

                     if sampler_md is not None:

                         sampler_md.optimize(

                                   self.vars["molecular_dynamics_steps"])

                     if sampler_mc is not None:

                         sampler_mc.optimize(self.vars["monte_carlo_steps"])

                 score = IMP.pmi.tools.get_restraint_set(

                     self.model).evaluate(False)

                 mpivs.set_value("score", score)

             if not self.nest:

                 output.set_output_entry("score", score)


             my_temp_index = int(rex.get_my_temp() * temp_index_factor)


             if self.vars["save_coordinates_mode"] == "lowest_temperature":

                 save_frame = (min_temp_index == my_temp_index)

             elif self.vars["save_coordinates_mode"] == "25th_score":

                 score_perc = mpivs.get_percentile("score")

                 save_frame = (score_perc*100.0 <= 25.0)

             elif self.vars["save_coordinates_mode"] == "50th_score":

                 score_perc = mpivs.get_percentile("score")

                 save_frame = (score_perc*100.0 <= 50.0)

             elif self.vars["save_coordinates_mode"] == "75th_score":

                 score_perc = mpivs.get_percentile("score")

                 save_frame = (score_perc*100.0 <= 75.0)


             # Ensure model is updated before saving output files

             if save_frame and not self.test_mode:

                 self.model.update()


             if save_frame:

                 print("--- frame %s score %s " % (str(i), str(score)))


                 if self.nest:

                     if math.isnan(score):

                         sampled_likelihoods.append(math.nan)

                     else:

                         likelihood_for_sample = 1

                         for rstrnt in self.nestor_restraints:

                             likelihood_for_sample *= rstrnt.get_likelihood()

                         sampled_likelihoods.append(likelihood_for_sample)

                         output.write_rmf(nestor_rmf_fname)


                 if not self.test_mode and not self.nest:

                     if i % self.vars["nframes_write_coordinates"] == 0:

                         print('--- writing coordinates')

                         if self.vars["number_of_best_scoring_models"] > 0:

                             output.write_pdb_best_scoring(score)

                         output.write_rmf(rmfname)

                         output.set_output_entry("rmf_file", rmfname)

                         output.set_output_entry("rmf_frame_index",

                                                 ntimes_at_low_temp)

                     else:

                         output.set_output_entry("rmf_file", rmfname)

                         output.set_output_entry("rmf_frame_index", '-1')

                     if self.output_objects is not None:

                         output.write_stat2(low_temp_stat_file)

                 ntimes_at_low_temp += 1


             if not self.test_mode and not self.nest:

                 output.write_stat2(replica_stat_file)

             if self.vars["replica_exchange_swap"]:

                 rex.swap_temp(i, score)


         if self.nest and len(sampled_likelihoods) > 0:

             with open("likelihoods_"

                       + str(self.replica_exchange_object.get_my_index()),

                       "wb") as lif:

                 pickle.dump(sampled_likelihoods, lif)


             output.close_rmf(nestor_rmf_fname)


         for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):

             p.add_replica_exchange(state, self)


         if not self.test_mode and not self.nest:

             print("closing production rmf files")

             output.close_rmf(rmfname)


 class BuildSystem:

     """A macro to build a IMP::pmi::topology::System based on a

        TopologyReader object.


     Easily create multi-state systems by calling this macro

     repeatedly with different TopologyReader objects!

     A useful function is get_molecules() which returns the PMI Molecules

     grouped by state as a dictionary with key = (molecule name),

     value = IMP.pmi.topology.Molecule

     Quick multi-state system:

     @code{.python}

     model = IMP.Model()

     reader1 = IMP.pmi.topology.TopologyReader(tfile1)

     reader2 = IMP.pmi.topology.TopologyReader(tfile2)

     bs = IMP.pmi.macros.BuildSystem(model)

     bs.add_state(reader1)

     bs.add_state(reader2)

     bs.execute_macro() # build everything including degrees of freedom

     IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())

     ### now you have a two state system, you add restraints etc

     @endcode

     @note The "domain name" entry of the topology reader is not used.

     All molecules are set up by the component name, but split into rigid bodies

     as requested.

     """


     _alphabets = {'DNA': IMP.pmi.alphabets.dna,

                   'RNA': IMP.pmi.alphabets.rna}


     def __init__(self, model, sequence_connectivity_scale=4.0,

                  force_create_gmm_files=False, resolutions=[1, 10],

                  name='System'):

         """Constructor

         @param model An IMP Model

         @param sequence_connectivity_scale For scaling the connectivity

                restraint

         @param force_create_gmm_files If True, will sample and create GMMs

                   no matter what. If False, will only sample if the

                   files don't exist. If number of Gaussians is zero, won't

                   do anything.

         @param resolutions The resolutions to build for structured regions

         @param name The name of the top-level hierarchy node.

         """

         self.model = model

         self.system = IMP.pmi.topology.System(self.model, name=name)

         self._readers = []    # the TopologyReaders (one per state)

         # TempResidues for each domain key=unique name,

         # value=(atomic_res,non_atomic_res).

         self._domain_res = []

         self._domains = []    # key = domain unique name, value = Component

         self.force_create_gmm_files = force_create_gmm_files

         self.resolutions = resolutions


     def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,

                   chain_ids=None):

         """Add a state using the topology info in a

            IMP::pmi::topology::TopologyReader object.

         When you are done adding states, call execute_macro()

         @param reader The TopologyReader object

         @param keep_chain_id If True, keep the chain IDs from the

                original PDB files, if available

         @param fasta_name_map dictionary for converting protein names

                found in the fasta file

         @param chain_ids A list or string of chain IDs for assigning to

                newly-created molecules, e.g.

                `string.ascii_uppercase+string.ascii_lowercase+string.digits`.

                If not specified, chain IDs A through Z are assigned, then

                AA through AZ, then BA through BZ, and so on, in the same

                fashion as PDB.

         """

         state = self.system.create_state()

         self._readers.append(reader)

         # key is unique name, value is (atomic res, nonatomicres)

         these_domain_res = {}

         these_domains = {}       # key is unique name, value is _Component

         if chain_ids is None:

             chain_ids = IMP.pmi.output._ChainIDs()

         numchain = 0


         # setup representation

         # loop over molecules, copies, then domains

         for molname in reader.get_molecules():

             copies = reader.get_molecules()[molname].domains

             for nc, copyname in enumerate(copies):

                 print("BuildSystem.add_state: setting up molecule %s copy "

                       "number %s" % (molname, str(nc)))

                 copy = copies[copyname]

                 # option to not rename chains

                 if keep_chain_id:

                     all_chains = [c for c in copy if c.chain is not None]

                     if all_chains:

                         chain_id = all_chains[0].chain

                     else:

                         chain_id = chain_ids[numchain]

                         warnings.warn(

                             "No PDBs specified for %s, so keep_chain_id has "

                             "no effect; using default chain ID '%s'"

                             % (molname, chain_id), IMP.pmi.ParameterWarning)

                 else:

                     chain_id = chain_ids[numchain]

                 if nc == 0:

                     alphabet = IMP.pmi.alphabets.amino_acid

                     fasta_flag = copy[0].fasta_flag

                     if fasta_flag in self._alphabets:

                         alphabet = self._alphabets[fasta_flag]

                     seqs = IMP.pmi.topology.Sequences(

                         copy[0].fasta_file, fasta_name_map)

                     seq = seqs[copy[0].fasta_id]

                     print("BuildSystem.add_state: molecule %s sequence has "

                           "%s residues" % (molname, len(seq)))

                     orig_mol = state.create_molecule(

                         molname, seq, chain_id, alphabet=alphabet,

                         uniprot=seqs.uniprot.get(copy[0].fasta_id))

                     mol = orig_mol

                     numchain += 1

                 else:

                     print("BuildSystem.add_state: creating a copy for "

                           "molecule %s" % molname)

                     mol = orig_mol.create_copy(chain_id)

                     numchain += 1


                 for domainnumber, domain in enumerate(copy):

                     print("BuildSystem.add_state: ---- setting up domain %s "

                           "of molecule %s" % (domainnumber, molname))

                     # we build everything in the residue range, even if it

                     #  extends beyond what's in the actual PDB file

                     these_domains[domain.get_unique_name()] = domain

                     if domain.residue_range == [] or \

                             domain.residue_range is None:

                         domain_res = mol.get_residues()

                     else:

                         start = domain.residue_range[0]+domain.pdb_offset

                         if domain.residue_range[1] == 'END':

                             end = len(mol.sequence)

                         else:

                             end = domain.residue_range[1]+domain.pdb_offset

                         domain_res = mol.residue_range(start-1, end-1)

                         print("BuildSystem.add_state: -------- domain %s of "

                               "molecule %s extends from residue %s to "

                               "residue %s "

                               % (domainnumber, molname, start, end))

                     if domain.pdb_file == "BEADS":

                         print("BuildSystem.add_state: -------- domain %s of "

                               "molecule %s represented by BEADS "

                               % (domainnumber, molname))

                         mol.add_representation(

                             domain_res,

                             resolutions=[domain.bead_size],

                             setup_particles_as_densities=(

                                 domain.em_residues_per_gaussian != 0),

                             color=domain.color)

                         these_domain_res[domain.get_unique_name()] = \

                             (set(), domain_res)

                     elif domain.pdb_file == "IDEAL_HELIX":

                         print("BuildSystem.add_state: -------- domain %s of "

                               "molecule %s represented by IDEAL_HELIX "

                               % (domainnumber, molname))

                         emper = domain.em_residues_per_gaussian

                         mol.add_representation(

                             domain_res,

                             resolutions=self.resolutions,

                             ideal_helix=True,

                             density_residues_per_component=emper,

                             density_prefix=domain.density_prefix,

                             density_force_compute=self.force_create_gmm_files,

                             color=domain.color)

                         these_domain_res[domain.get_unique_name()] = \

                             (domain_res, set())

                     else:

                         print("BuildSystem.add_state: -------- domain %s of "

                               "molecule %s represented by pdb file %s "

                               % (domainnumber, molname, domain.pdb_file))

                         domain_atomic = mol.add_structure(domain.pdb_file,

                                                           domain.chain,

                                                           domain.residue_range,

                                                           domain.pdb_offset,

                                                           soft_check=True)

                         domain_non_atomic = domain_res - domain_atomic

                         if not domain.em_residues_per_gaussian:

                             mol.add_representation(

                                 domain_atomic, resolutions=self.resolutions,

                                 color=domain.color)

                             if len(domain_non_atomic) > 0:

                                 mol.add_representation(

                                     domain_non_atomic,

                                     resolutions=[domain.bead_size],

                                     color=domain.color)

                         else:

                             print("BuildSystem.add_state: -------- domain %s "

                                   "of molecule %s represented by gaussians "

                                   % (domainnumber, molname))

                             emper = domain.em_residues_per_gaussian

                             creategmm = self.force_create_gmm_files

                             mol.add_representation(

                                 domain_atomic,

                                 resolutions=self.resolutions,

                                 density_residues_per_component=emper,

                                 density_prefix=domain.density_prefix,

                                 density_force_compute=creategmm,

                                 color=domain.color)

                             if len(domain_non_atomic) > 0:

                                 mol.add_representation(

                                     domain_non_atomic,

                                     resolutions=[domain.bead_size],

                                     setup_particles_as_densities=True,

                                     color=domain.color)

                         these_domain_res[domain.get_unique_name()] = (

                             domain_atomic, domain_non_atomic)

         self._domain_res.append(these_domain_res)

         self._domains.append(these_domains)

         print('BuildSystem.add_state: State', len(self.system.states), 'added')

         return state


     def get_molecules(self):

         """Return list of all molecules grouped by state.

         For each state, it's a dictionary of Molecules where key is the

         molecule name

         """

         return [s.get_molecules() for s in self.system.get_states()]


     def get_molecule(self, molname, copy_index=0, state_index=0):

         return self.system.get_states()[state_index].get_molecules()[

                 molname][copy_index]


     def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,

                       max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):

         """Builds representations and sets up degrees of freedom"""

         print("BuildSystem.execute_macro: building representations")

         self.root_hier = self.system.build()


         print("BuildSystem.execute_macro: setting up degrees of freedom")

         self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)

         for nstate, reader in enumerate(self._readers):

             rbs = reader.get_rigid_bodies()

             srbs = reader.get_super_rigid_bodies()

             csrbs = reader.get_chains_of_super_rigid_bodies()


             # add rigid bodies

             domains_in_rbs = set()

             for rblist in rbs:

                 print("BuildSystem.execute_macro: -------- building rigid "

                       "body %s" % (str(rblist)))

                 all_res = IMP.pmi.tools.OrderedSet()

                 bead_res = IMP.pmi.tools.OrderedSet()

                 for dname in rblist:

                     domain = self._domains[nstate][dname]

                     print("BuildSystem.execute_macro: -------- adding %s"

                           % (str(dname)))

                     all_res |= self._domain_res[nstate][dname][0]

                     bead_res |= self._domain_res[nstate][dname][1]

                     domains_in_rbs.add(dname)

                 all_res |= bead_res

                 print("BuildSystem.execute_macro: -------- creating rigid "

                       "body with max_trans %s max_rot %s "

                       "non_rigid_max_trans %s"

                       % (str(max_rb_trans), str(max_rb_rot),

                          str(max_bead_trans)))

                 self.dof.create_rigid_body(all_res,

                                            nonrigid_parts=bead_res,

                                            max_trans=max_rb_trans,

                                            max_rot=max_rb_rot,

                                            nonrigid_max_trans=max_bead_trans,

                                            name="RigidBody %s" % dname)


             # if you have any domains not in an RB, set them as flexible beads

             for dname, domain in self._domains[nstate].items():

                 if dname not in domains_in_rbs:

                     if domain.pdb_file != "BEADS":

                         warnings.warn(

                             "No rigid bodies set for %s. Residues read from "

                             "the PDB file will not be sampled - only regions "

                             "missing from the PDB will be treated flexibly. "

                             "To sample the entire sequence, use BEADS instead "

                             "of a PDB file name" % dname,

                             IMP.pmi.StructureWarning)

                     self.dof.create_flexible_beads(

                             self._domain_res[nstate][dname][1],

                             max_trans=max_bead_trans)


             # add super rigid bodies

             for srblist in srbs:

                 print("BuildSystem.execute_macro: -------- building "

                       "super rigid body %s" % (str(srblist)))

                 all_res = IMP.pmi.tools.OrderedSet()

                 for dname in srblist:

                     print("BuildSystem.execute_macro: -------- adding %s"

                           % (str(dname)))

                     all_res |= self._domain_res[nstate][dname][0]

                     all_res |= self._domain_res[nstate][dname][1]


                 print("BuildSystem.execute_macro: -------- creating super "

                       "rigid body with max_trans %s max_rot %s "

                       % (str(max_srb_trans), str(max_srb_rot)))

                 self.dof.create_super_rigid_body(

                     all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)


             # add chains of super rigid bodies

             for csrblist in csrbs:

                 all_res = IMP.pmi.tools.OrderedSet()

                 for dname in csrblist:

                     all_res |= self._domain_res[nstate][dname][0]

                     all_res |= self._domain_res[nstate][dname][1]

                 all_res = list(all_res)

                 all_res.sort(key=lambda r: r.get_index())

                 self.dof.create_main_chain_mover(all_res)

         return self.root_hier, self.dof


 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")

 class AnalysisReplicaExchange0:

     """A macro for running all the basic operations of analysis.

     Includes clustering, precision analysis, and making ensemble density maps.

     A number of plots are also supported.

     """

     def __init__(self, model,

                  merge_directories=["./"],

                  stat_file_name_suffix="stat",

                  best_pdb_name_suffix="model",

                  do_clean_first=True,

                  do_create_directories=True,

                  global_output_directory="output/",

                  replica_stat_file_suffix="stat_replica",

                  global_analysis_result_directory="./analysis/",

                  test_mode=False):

         """Constructor.

            @param model                           The IMP model

            @param stat_file_name_suffix

            @param merge_directories The directories containing output files

            @param best_pdb_name_suffix

            @param do_clean_first

            @param do_create_directories

            @param global_output_directory          Where everything is

            @param replica_stat_file_suffix

            @param global_analysis_result_directory

            @param test_mode If True, nothing is changed on disk

         """


         try:

             from mpi4py import MPI

             self.comm = MPI.COMM_WORLD

             self.rank = self.comm.Get_rank()

             self.number_of_processes = self.comm.size

         except ImportError:

             self.rank = 0

             self.number_of_processes = 1


         self.test_mode = test_mode

         self._protocol_output = []

         self.cluster_obj = None

         self.model = model

         stat_dir = global_output_directory

         self.stat_files = []

         # it contains the position of the root directories

         for rd in merge_directories:

             stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))

             if len(stat_files) == 0:

                 warnings.warn("no stat files found in %s"

                               % os.path.join(rd, stat_dir),

                               IMP.pmi.MissingFileWarning)

             self.stat_files += stat_files


     def add_protocol_output(self, p):

         """Capture details of the modeling protocol.

            @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.

         """

         # Assume last state is the one we're interested in

         self._protocol_output.append((p, p._last_state))


     def get_modeling_trajectory(self,

                                 score_key="Total_Score",

                                 rmf_file_key="rmf_file",

                                 rmf_file_frame_key="rmf_frame_index",

                                 outputdir="./",

                                 get_every=1,

                                 nframes_trajectory=10000):

         """ Get a trajectory of the modeling run, for generating

         demonstrative movies


         @param score_key           The score for ranking models

         @param rmf_file_key        Key pointing to RMF filename

         @param rmf_file_frame_key  Key pointing to RMF frame number

         @param outputdir           The local output directory used in the run

         @param get_every           Extract every nth frame

         @param nframes_trajectory  Total number of frames of the trajectory

         """

         import math


         trajectory_models = IMP.pmi.io.get_trajectory_models(

             self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,

             get_every)

         score_list = list(map(float, trajectory_models[2]))


         max_score = max(score_list)

         min_score = min(score_list)


         bins = [(max_score-min_score)*math.exp(-float(i))+min_score

                 for i in range(nframes_trajectory)]

         binned_scores = [None]*nframes_trajectory

         binned_model_indexes = [-1]*nframes_trajectory


         for model_index, s in enumerate(score_list):

             bins_score_diffs = [abs(s-b) for b in bins]

             bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]

             if binned_scores[bin_index] is None:

                 binned_scores[bin_index] = s

                 binned_model_indexes[bin_index] = model_index

             else:

                 old_diff = abs(binned_scores[bin_index]-bins[bin_index])

                 new_diff = abs(s-bins[bin_index])

                 if new_diff < old_diff:

                     binned_scores[bin_index] = s

                     binned_model_indexes[bin_index] = model_index


         print(binned_scores)

         print(binned_model_indexes)


     def _expand_ambiguity(self, prot, d):

         """If using PMI2, expand the dictionary to include copies as

            ambiguous options


            This also keeps the states separate.

         """

         newdict = {}

         for key in d:

             val = d[key]

             if '..' in key or (isinstance(val, tuple) and len(val) >= 3):

                 newdict[key] = val

                 continue

             states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)

             if isinstance(val, tuple):

                 start = val[0]

                 stop = val[1]

                 name = val[2]

             else:

                 start = 1

                 stop = -1

                 name = val

             for nst in range(len(states)):

                 sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)

                 copies = sel.get_selected_particles(with_representation=False)

                 if len(copies) > 1:

                     for nc in range(len(copies)):

                         if len(states) > 1:

                             newdict['%s.%i..%i' % (name, nst, nc)] = \

                                 (start, stop, name, nc, nst)

                         else:

                             newdict['%s..%i' % (name, nc)] = \

                                 (start, stop, name, nc, nst)

                 else:

                     newdict[key] = val

         return newdict


     def clustering(self,

                    score_key="Total_Score",

                    rmf_file_key="rmf_file",

                    rmf_file_frame_key="rmf_frame_index",

                    state_number=0,

                    prefiltervalue=None,

                    feature_keys=[],

                    outputdir="./",

                    alignment_components=None,

                    number_of_best_scoring_models=10,

                    rmsd_calculation_components=None,

                    distance_matrix_file='distances.mat',

                    load_distance_matrix_file=False,

                    skip_clustering=False,

                    number_of_clusters=1,

                    display_plot=False,

                    exit_after_display=True,

                    get_every=1,

                    first_and_last_frames=None,

                    density_custom_ranges=None,

                    write_pdb_with_centered_coordinates=False,

                    voxel_size=5.0):

         """Get the best scoring models, compute a distance matrix,

            cluster them, and create density maps.


         Tuple format: "molname" just the molecule,

         or (start,stop,molname,copy_num(optional),state_num(optional)

         Can pass None for copy or state to ignore that field.

         If you don't pass a specific copy number


         @param score_key              The score for ranking models.

         @param rmf_file_key           Key pointing to RMF filename

         @param rmf_file_frame_key     Key pointing to RMF frame number

         @param state_number           State number to analyze

         @param prefiltervalue         Only include frames where the

                score key is below this value

         @param feature_keys           Keywords for which you want to

                calculate average, medians, etc.

                If you pass "Keyname" it'll include everything that matches

                "*Keyname*"

         @param outputdir               The local output directory used in

                the run

         @param alignment_components    Dictionary with keys=groupname,

                values are tuples for aligning the structures

                e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}

         @param number_of_best_scoring_models  Num models to keep per run

         @param rmsd_calculation_components    For calculating RMSD

                (same format as alignment_components)

         @param distance_matrix_file           Where to store/read the

                distance matrix

         @param load_distance_matrix_file      Try to load the distance

                matrix file

         @param skip_clustering                Just extract the best scoring

                models and save the pdbs

         @param number_of_clusters             Number of k-means clusters

         @param display_plot                   Display the distance matrix

         @param exit_after_display             Exit after displaying distance

                matrix

         @param get_every                      Extract every nth frame

         @param first_and_last_frames          A tuple with the first and last

                frames to be analyzed. Values are percentages!

                Default: get all frames

         @param density_custom_ranges          For density calculation

                (same format as alignment_components)

         @param write_pdb_with_centered_coordinates

         @param voxel_size                     Used for the density output

         """

         # Track provenance information to be added to each output model

         prov = []

         self._outputdir = Path(outputdir).absolute()

         self._number_of_clusters = number_of_clusters

         for p, state in self._protocol_output:

             p.add_replica_exchange_analysis(state, self, density_custom_ranges)


         if self.test_mode:

             return


         if self.rank == 0:

             try:

                 os.mkdir(outputdir)

             except:  # noqa: E722

                 pass


         if not load_distance_matrix_file:

             if len(self.stat_files) == 0:

                 print("ERROR: no stat file found in the given path")

                 return

             my_stat_files = IMP.pmi.tools.chunk_list_into_segments(

                 self.stat_files, self.number_of_processes)[self.rank]


             # read ahead to check if you need the PMI2 score key instead

             for k in (score_key, rmf_file_key, rmf_file_frame_key):

                 if k in feature_keys:

                     warnings.warn(

                             "no need to pass " + k + " to feature_keys.",

                             IMP.pmi.ParameterWarning)

                     feature_keys.remove(k)


             best_models = IMP.pmi.io.get_best_models(

                 my_stat_files, score_key, feature_keys, rmf_file_key,

                 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)

             rmf_file_list = best_models[0]

             rmf_file_frame_list = best_models[1]

             score_list = best_models[2]

             feature_keyword_list_dict = best_models[3]


 # ------------------------------------------------------------------------

 # collect all the files and scores

 # ------------------------------------------------------------------------


             if self.number_of_processes > 1:

                 score_list = IMP.pmi.tools.scatter_and_gather(score_list)

                 rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)

                 rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(

                     rmf_file_frame_list)

                 for k in feature_keyword_list_dict:

                     feature_keyword_list_dict[k] = \

                         IMP.pmi.tools.scatter_and_gather(

                             feature_keyword_list_dict[k])


             # sort by score and get the best scoring ones

             score_rmf_tuples = list(zip(score_list,

                                         rmf_file_list,

                                         rmf_file_frame_list,

                                         list(range(len(score_list)))))


             if density_custom_ranges:

                 for k in density_custom_ranges:

                     if not isinstance(density_custom_ranges[k], list):

                         raise Exception("Density custom ranges: values must "

                                         "be lists of tuples")


             # keep subset of frames if requested

             if first_and_last_frames is not None:

                 nframes = len(score_rmf_tuples)

                 first_frame = int(first_and_last_frames[0] * nframes)

                 last_frame = int(first_and_last_frames[1] * nframes)

                 if last_frame > len(score_rmf_tuples):

                     last_frame = -1

                 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]


             # sort RMFs by the score_key in ascending order, and store the rank

             best_score_rmf_tuples = sorted(

                 score_rmf_tuples,

                 key=lambda x: float(x[0]))[:number_of_best_scoring_models]

             best_score_rmf_tuples = [t+(n,) for n, t in

                                      enumerate(best_score_rmf_tuples)]

             # Note in the provenance info that we only kept best-scoring models

             prov.append(IMP.pmi.io.FilterProvenance(

                 "Best scoring", 0, number_of_best_scoring_models))

             # sort the feature scores in the same way

             best_score_feature_keyword_list_dict = defaultdict(list)

             for tpl in best_score_rmf_tuples:

                 index = tpl[3]

                 for f in feature_keyword_list_dict:

                     best_score_feature_keyword_list_dict[f].append(

                         feature_keyword_list_dict[f][index])

             my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(

                 best_score_rmf_tuples,

                 self.number_of_processes)[self.rank]


             # expand the dictionaries to include ambiguous copies

             prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(

                 self.model, 0, my_best_score_rmf_tuples[0][1])[0]

             if rmsd_calculation_components is not None:

                 tmp = self._expand_ambiguity(

                     prot_ahead, rmsd_calculation_components)

                 if tmp != rmsd_calculation_components:

                     print('Detected ambiguity, expand rmsd components to',

                           tmp)

                     rmsd_calculation_components = tmp

             if alignment_components is not None:

                 tmp = self._expand_ambiguity(prot_ahead,

                                              alignment_components)

                 if tmp != alignment_components:

                     print('Detected ambiguity, expand alignment '

                           'components to', tmp)

                     alignment_components = tmp


 # -------------------------------------------------------------

 # read the coordinates

 # ------------------------------------------------------------

             rmsd_weights = IMP.pmi.io.get_bead_sizes(

                 self.model, my_best_score_rmf_tuples[0],

                 rmsd_calculation_components, state_number=state_number)

             got_coords = IMP.pmi.io.read_coordinates_of_rmfs(

                 self.model, my_best_score_rmf_tuples, alignment_components,

                 rmsd_calculation_components, state_number=state_number)


             # note! the coordinates are simply float tuples, NOT decorators,

             # NOT Vector3D, NOR particles, because these object cannot be

             # serialized. We need serialization

             # for the parallel computation based on mpi.


             # dict:key=component name,val=coords per hit

             all_coordinates = got_coords[0]


             # same as above, limited to alignment bits

             alignment_coordinates = got_coords[1]


             # same as above, limited to RMSD bits

             rmsd_coordinates = got_coords[2]


             # dictionary with key=RMF, value=score rank

             rmf_file_name_index_dict = got_coords[3]


             # RMF file per hit

             all_rmf_file_names = got_coords[4]


 # ------------------------------------------------------------------------

 # optionally don't compute distance matrix or cluster, just write top files

 # ------------------------------------------------------------------------

             if skip_clustering:

                 if density_custom_ranges:

                     DensModule = IMP.pmi.analysis.GetModelDensity(

                         density_custom_ranges, voxel=voxel_size)


                 dircluster = os.path.join(outputdir,

                                           "all_models."+str(self.rank))

                 try:

                     os.mkdir(outputdir)

                 except:  # noqa: E722

                     pass

                 try:

                     os.mkdir(dircluster)

                 except:  # noqa: E722

                     pass

                 clusstat = open(os.path.join(

                     dircluster, "stat."+str(self.rank)+".out"), "w")

                 for cnt, tpl in enumerate(my_best_score_rmf_tuples):

                     rmf_name = tpl[1]

                     rmf_frame_number = tpl[2]

                     tmp_dict = {}

                     index = tpl[4]

                     for key in best_score_feature_keyword_list_dict:

                         tmp_dict[key] = \

                             best_score_feature_keyword_list_dict[key][index]


                     if cnt == 0:

                         prots, rs = \

                             IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(

                                 self.model, rmf_frame_number, rmf_name)

                     else:

                         linking_successful = \

                             IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(

                                 self.model, prots, rs, rmf_frame_number,

                                 rmf_name)

                         if not linking_successful:

                             continue


                     if not prots:

                         continue


                     states = IMP.atom.get_by_type(

                         prots[0], IMP.atom.STATE_TYPE)

                     prot = states[state_number]


                     # get transformation aligning coordinates of

                     # requested tuples to the first RMF file

                     if cnt == 0:

                         coords_f1 = alignment_coordinates[cnt]

                     if cnt > 0:

                         coords_f2 = alignment_coordinates[cnt]

                         if coords_f2:

                             Ali = IMP.pmi.analysis.Alignment(

                                 coords_f1, coords_f2)

                             transformation = Ali.align()[1]

                         else:

                             transformation = \

                                 IMP.algebra.get_identity_transformation_3d()


                         rbs = set()

                         for p in IMP.atom.get_leaves(prot):

                             if not IMP.core.XYZR.get_is_setup(p):

                                 IMP.core.XYZR.setup_particle(p)

                                 IMP.core.XYZR(p).set_radius(0.0001)

                                 IMP.core.XYZR(p).set_coordinates((0, 0, 0))


                             if IMP.core.RigidBodyMember.get_is_setup(p):

                                 rbm = IMP.core.RigidBodyMember(p)

                                 rb = rbm.get_rigid_body()

                                 rbs.add(rb)

                             else:

                                 IMP.core.transform(IMP.core.XYZ(p),

                                                    transformation)

                         for rb in rbs:

                             IMP.core.transform(rb, transformation)


                     o = IMP.pmi.output.Output()

                     self.model.update()

                     out_pdb_fn = os.path.join(

                         dircluster, str(cnt)+"."+str(self.rank)+".pdb")

                     out_rmf_fn = os.path.join(

                         dircluster, str(cnt)+"."+str(self.rank)+".rmf3")

                     o.init_pdb(out_pdb_fn, prot)

                     tc = write_pdb_with_centered_coordinates

                     o.write_pdb(out_pdb_fn,

                                 translate_to_geometric_center=tc)


                     tmp_dict["local_pdb_file_name"] = \

                         os.path.basename(out_pdb_fn)

                     tmp_dict["rmf_file_full_path"] = rmf_name

                     tmp_dict["local_rmf_file_name"] = \

                         os.path.basename(out_rmf_fn)

                     tmp_dict["local_rmf_frame_number"] = 0


                     clusstat.write(str(tmp_dict)+"\n")


                     # create a single-state System and write that

                     h = IMP.atom.Hierarchy.setup_particle(

                         IMP.Particle(self.model))

                     h.set_name("System")

                     h.add_child(prot)

                     o.init_rmf(out_rmf_fn, [h], rs)


                     o.write_rmf(out_rmf_fn)

                     o.close_rmf(out_rmf_fn)

                     # add the density

                     if density_custom_ranges:

                         DensModule.add_subunits_density(prot)


                 if density_custom_ranges:

                     DensModule.write_mrc(path=dircluster)

                     del DensModule

                 return


             # broadcast the coordinates

             if self.number_of_processes > 1:

                 all_coordinates = IMP.pmi.tools.scatter_and_gather(

                     all_coordinates)

                 all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(

                     all_rmf_file_names)

                 rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(

                     rmf_file_name_index_dict)

                 alignment_coordinates = IMP.pmi.tools.scatter_and_gather(

                     alignment_coordinates)

                 rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(

                     rmsd_coordinates)


             if self.rank == 0:

                 # save needed information in external files

                 self.save_objects(

                     [best_score_feature_keyword_list_dict,

                      rmf_file_name_index_dict],

                     ".macro.pkl")


 # ------------------------------------------------------------------------

 # Calculate distance matrix and cluster

 # ------------------------------------------------------------------------

             print("setup clustering class")

             self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)


             for n, model_coordinate_dict in enumerate(all_coordinates):

                 # let's try to align

                 if (alignment_components is not None

                         and len(self.cluster_obj.all_coords) == 0):

                     # set the first model as template coordinates

                     self.cluster_obj.set_template(alignment_coordinates[n])

                 self.cluster_obj.fill(all_rmf_file_names[n],

                                       rmsd_coordinates[n])

             print("Global calculating the distance matrix")


             # calculate distance matrix, all against all

             self.cluster_obj.dist_matrix()


             # perform clustering and optionally display

             if self.rank == 0:

                 self.cluster_obj.do_cluster(number_of_clusters)

                 if display_plot:

                     if self.rank == 0:

                         self.cluster_obj.plot_matrix(

                             figurename=os.path.join(outputdir,

                                                     'dist_matrix.pdf'))

                     if exit_after_display:

                         exit()

                 self.cluster_obj.save_distance_matrix_file(

                     file_name=distance_matrix_file)


 # ------------------------------------------------------------------------

 # Alternatively, load the distance matrix from file and cluster that

 # ------------------------------------------------------------------------

         else:

             if self.rank == 0:

                 print("setup clustering class")

                 self.cluster_obj = IMP.pmi.analysis.Clustering()

                 self.cluster_obj.load_distance_matrix_file(

                     file_name=distance_matrix_file)

                 print("clustering with %s clusters" % str(number_of_clusters))

                 self.cluster_obj.do_cluster(number_of_clusters)

                 [best_score_feature_keyword_list_dict,

                  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")

                 if display_plot:

                     if self.rank == 0:

                         self.cluster_obj.plot_matrix(figurename=os.path.join(

                             outputdir, 'dist_matrix.pdf'))

                     if exit_after_display:

                         exit()

         if self.number_of_processes > 1:

             self.comm.Barrier()


 # ------------------------------------------------------------------------

 # now save all information about the clusters

 # ------------------------------------------------------------------------


         if self.rank == 0:

             print(self.cluster_obj.get_cluster_labels())

             for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):

                 print("rank %s " % str(self.rank))

                 print("cluster %s " % str(n))

                 print("cluster label %s " % str(cl))

                 print(self.cluster_obj.get_cluster_label_names(cl))

                 cluster_size = \

                     len(self.cluster_obj.get_cluster_label_names(cl))

                 cluster_prov = \

                     prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]


                 # first initialize the Density class if requested

                 if density_custom_ranges:

                     DensModule = IMP.pmi.analysis.GetModelDensity(

                         density_custom_ranges,

                         voxel=voxel_size)


                 dircluster = outputdir + "/cluster." + str(n) + "/"

                 try:

                     os.mkdir(dircluster)

                 except:   # noqa: E722

                     pass


                 rmsd_dict = {

                     "AVERAGE_RMSD":

                     str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}

                 clusstat = open(dircluster + "stat.out", "w")

                 for k, structure_name in enumerate(

                         self.cluster_obj.get_cluster_label_names(cl)):

                     # extract the features

                     tmp_dict = {}

                     tmp_dict.update(rmsd_dict)

                     index = rmf_file_name_index_dict[structure_name]

                     for key in best_score_feature_keyword_list_dict:

                         tmp_dict[

                             key] = best_score_feature_keyword_list_dict[

                             key][

                             index]


                     # get the rmf name and the frame number from the list of

                     # frame names

                     rmf_name = structure_name.split("|")[0]

                     rmf_frame_number = int(structure_name.split("|")[1])

                     clusstat.write(str(tmp_dict) + "\n")


                     # extract frame (open or link to existing)

                     if k == 0:

                         prots, rs = \

                             IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(

                                 self.model, rmf_frame_number, rmf_name)

                     else:

                         linking_successful = \

                             IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(

                                 self.model, prots, rs, rmf_frame_number,

                                 rmf_name)

                         if not linking_successful:

                             continue

                     if not prots:

                         continue


                     states = IMP.atom.get_by_type(

                         prots[0], IMP.atom.STATE_TYPE)

                     prot = states[state_number]

                     if k == 0:

                         IMP.pmi.io.add_provenance(cluster_prov, (prot,))


                     # transform clusters onto first

                     if k > 0:

                         co = self.cluster_obj

                         model_index = co.get_model_index_from_name(

                             structure_name)

                         transformation = co.get_transformation_to_first_member(

                             cl, model_index)

                         rbs = set()

                         for p in IMP.atom.get_leaves(prot):

                             if not IMP.core.XYZR.get_is_setup(p):

                                 IMP.core.XYZR.setup_particle(p)

                                 IMP.core.XYZR(p).set_radius(0.0001)

                                 IMP.core.XYZR(p).set_coordinates((0, 0, 0))


                             if IMP.core.RigidBodyMember.get_is_setup(p):

                                 rbm = IMP.core.RigidBodyMember(p)

                                 rb = rbm.get_rigid_body()

                                 rbs.add(rb)

                             else:

                                 IMP.core.transform(IMP.core.XYZ(p),

                                                    transformation)

                         for rb in rbs:

                             IMP.core.transform(rb, transformation)


                     # add the density

                     if density_custom_ranges:

                         DensModule.add_subunits_density(prot)


                     # pdb writing should be optimized!

                     o = IMP.pmi.output.Output()

                     self.model.update()

                     o.init_pdb(dircluster + str(k) + ".pdb", prot)

                     o.write_pdb(dircluster + str(k) + ".pdb")


                     # create a single-state System and write that

                     h = IMP.atom.Hierarchy.setup_particle(

                         IMP.Particle(self.model))

                     h.set_name("System")

                     h.add_child(prot)

                     o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)

                     o.write_rmf(dircluster + str(k) + ".rmf3")

                     o.close_rmf(dircluster + str(k) + ".rmf3")


                     del o

                     # IMP.atom.destroy(prot)


                 if density_custom_ranges:

                     DensModule.write_mrc(path=dircluster)

                     del DensModule


         if self.number_of_processes > 1:

             self.comm.Barrier()


     def get_cluster_rmsd(self, cluster_num):

         if self.cluster_obj is None:

             raise Exception("Run clustering first")

         return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)


     def save_objects(self, objects, file_name):

         import pickle

         outf = open(file_name, 'wb')

         pickle.dump(objects, outf)

         outf.close()


     def load_objects(self, file_name):

         import pickle

         inputf = open(file_name, 'rb')

         objects = pickle.load(inputf)

         inputf.close()

         return objects


 class AnalysisReplicaExchange:


     """

     This class contains analysis utilities to investigate ReplicaExchange

     results.

     """


     ########################

     # Construction and Setup

     ########################


     def __init__(self, model, stat_files, best_models=None, score_key=None,

                  alignment=True):

         """

         Construction of the Class.

         @param model IMP.Model()

         @param stat_files list of string. Can be ascii stat files,

                rmf files names

         @param best_models Integer. Number of best scoring models,

                if None: all models will be read

         @param score_key Use the provided stat key keyword as the score

                (by default, the total score is used)

         @param alignment boolean (Default=True). Align before computing

                the rmsd.

         """


         self.model = model

         self.best_models = best_models

         self.stath0 = IMP.pmi.output.StatHierarchyHandler(

             model, stat_files, self.best_models, score_key, cache=True)

         self.stath1 = IMP.pmi.output.StatHierarchyHandler(

             StatHierarchyHandler=self.stath0)


         self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(

             IMP.pmi.tools.select_at_all_resolutions(self.stath1))

         self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(

             IMP.pmi.tools.select_at_all_resolutions(self.stath0))

         self.sel0_rmsd = IMP.atom.Selection(self.stath0)

         self.sel1_rmsd = IMP.atom.Selection(self.stath1)

         self.sel0_alignment = IMP.atom.Selection(self.stath0)

         self.sel1_alignment = IMP.atom.Selection(self.stath1)

         self.clusters = []

         # fill the cluster list with a single cluster containing all models

         c = IMP.pmi.output.Cluster(0)

         self.clusters.append(c)

         for n0 in range(len(self.stath0)):

             c.add_member(n0)

         self.pairwise_rmsd = {}

         self.pairwise_molecular_assignment = {}

         self.alignment = alignment

         self.symmetric_molecules = {}

         self.issymmetricsel = {}

         self.update_seldicts()

         self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(

             IMP.atom.get_leaves(self.stath0))

         self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(

             IMP.atom.get_leaves(self.stath1))


     def set_rmsd_selection(self, **kwargs):

         """

         Setup the selection onto which the rmsd is computed

         @param kwargs use IMP.atom.Selection keywords

         """

         self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)

         self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)

         self.update_seldicts()


     def set_symmetric(self, molecule_name):

         """

         Store names of symmetric molecules

         """

         self.symmetric_molecules[molecule_name] = 0

         self.update_seldicts()


     def set_alignment_selection(self, **kwargs):

         """

         Setup the selection onto which the alignment is computed

         @param kwargs use IMP.atom.Selection keywords

         """

         self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)

         self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)


     ######################

     # Clustering functions

     ######################

     def clean_clusters(self):

         for c in self.clusters:

             del c

         self.clusters = []


     def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):

         """

         Cluster the models based on RMSD.

         @param rmsd_cutoff Float the distance cutoff in Angstrom

         @param metric (Default=IMP.atom.get_rmsd) the metric that will

                be used to compute rmsds

         """

         self.clean_clusters()

         not_clustered = set(range(len(self.stath1)))

         while len(not_clustered) > 0:

             self.aggregate(not_clustered, rmsd_cutoff, metric)

         self.update_clusters()


     def refine(self, rmsd_cutoff=10):

         """

         Refine the clusters by merging the ones whose centers are close

         @param rmsd_cutoff cutoff distance in Angstorms

         """

         clusters_copy = self.clusters

         for c0, c1 in itertools.combinations(self.clusters, 2):

             if c0.center_index is None:

                 self.compute_cluster_center(c0)

             if c1.center_index is None:

                 self.compute_cluster_center(c1)

             _ = self.stath0[c0.center_index]

             _ = self.stath1[c1.center_index]

             rmsd, molecular_assignment = self.rmsd()

             if rmsd <= rmsd_cutoff:

                 if c1 in self.clusters:

                     clusters_copy.remove(c1)

                 c0 += c1

         self.clusters = clusters_copy

         self.update_clusters()


     ####################

     # Input Output

     ####################


     def set_cluster_assignments(self, cluster_ids):

         if len(cluster_ids) != len(self.stath0):

             raise ValueError('cluster ids has to be same length as '

                              'number of frames')


         self.clusters = []

         for i in sorted(list(set(cluster_ids))):

             self.clusters.append(IMP.pmi.output.Cluster(i))

         for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):

             self.clusters[idx].add_member(i, d)


     def get_cluster_data(self, cluster):

         """

         Return the model data from a cluster

         @param cluster IMP.pmi.output.Cluster object

         """

         data = []

         for m in cluster:

             data.append(m)

         return data


     def save_data(self, filename='data.pkl'):

         """

         Save the data for the whole models into a pickle file

         @param filename string

         """

         self.stath0.save_data(filename)


     def set_data(self, data):

         """

         Set the data from an external IMP.pmi.output.Data

         @param data IMP.pmi.output.Data

         """

         self.stath0.data = data

         self.stath1.data = data


     def load_data(self, filename='data.pkl'):

         """

         Load the data from an external pickled file

         @param filename string

         """

         self.stath0.load_data(filename)

         self.stath1.load_data(filename)

         self.best_models = len(self.stath0)


     def add_cluster(self, rmf_name_list):

         c = IMP.pmi.output.Cluster(len(self.clusters))

         print("creating cluster index "+str(len(self.clusters)))

         self.clusters.append(c)

         current_len = len(self.stath0)


         for rmf in rmf_name_list:

             print("adding rmf "+rmf)

             self.stath0.add_stat_file(rmf)

             self.stath1.add_stat_file(rmf)


         for n0 in range(current_len, len(self.stath0)):

             d0 = self.stath0[n0]

             c.add_member(n0, d0)

         self.update_clusters()


     def save_clusters(self, filename='clusters.pkl'):

         """

         Save the clusters into a pickle file

         @param filename string

         """

         import pickle

         fl = open(filename, 'wb')

         pickle.dump(self.clusters, fl)


     def load_clusters(self, filename='clusters.pkl', append=False):

         """

         Load the clusters from a pickle file

         @param filename string

         @param append bool (Default=False), if True. append the clusters

                to the ones currently present

         """

         import pickle

         fl = open(filename, 'rb')

         self.clean_clusters()

         if append:

             self.clusters += pickle.load(fl)

         else:

             self.clusters = pickle.load(fl)

         self.update_clusters()


     ####################

     # Analysis Functions

     ####################


     def compute_cluster_center(self, cluster):

         """

         Compute the cluster center for a given cluster

         """

         member_distance = defaultdict(float)


         for n0, n1 in itertools.combinations(cluster.members, 2):

             _ = self.stath0[n0]

             _ = self.stath1[n1]

             rmsd, _ = self.rmsd()

             member_distance[n0] += rmsd


         if len(member_distance) > 0:

             cluster.center_index = min(member_distance,

                                        key=member_distance.get)

         else:

             cluster.center_index = cluster.members[0]


     def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",

                          prefix="./"):

         """

         Save the coordinates of the current cluster a single rmf file

         """

         print("saving coordinates", cluster)

         if self.alignment:

             self.set_reference(reference, cluster)

         o = IMP.pmi.output.Output()

         if rmf_name is None:

             rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"


         _ = self.stath1[cluster.members[0]]

         self.model.update()

         o.init_rmf(rmf_name, [self.stath1])

         for n1 in cluster.members:

             _ = self.stath1[n1]

             self.model.update()

             self.apply_molecular_assignments(n1)

             if self.alignment:

                 self.align()

             o.write_rmf(rmf_name)

             self.undo_apply_molecular_assignments(n1)

         o.close_rmf(rmf_name)


     def prune_redundant_structures(self, rmsd_cutoff=10):

         """

         remove structures that are similar

         append it to a new cluster

         """

         print("pruning models")

         selected = 0

         filtered = [selected]

         remaining = range(1, len(self.stath1), 10)


         while len(remaining) > 0:

             d0 = self.stath0[selected]

             rm = []

             for n1 in remaining:

                 _ = self.stath1[n1]

                 if self.alignment:

                     self.align()

                 d, _ = self.rmsd()

                 if d <= rmsd_cutoff:

                     rm.append(n1)

                     print("pruning model %s, similar to model %s, rmsd %s"

                           % (str(n1), str(selected), str(d)))

             remaining = [x for x in remaining if x not in rm]

             if len(remaining) == 0:

                 break

             selected = remaining[0]

             filtered.append(selected)

             remaining.pop(0)

         c = IMP.pmi.output.Cluster(len(self.clusters))

         self.clusters.append(c)

         for n0 in filtered:

             d0 = self.stath0[n0]

             c.add_member(n0, d0)

         self.update_clusters()


     def precision(self, cluster):

         """

         Compute the precision of a cluster

         """

         npairs = 0

         rmsd = 0.0

         precision = None


         if cluster.center_index is not None:

             members1 = [cluster.center_index]

         else:

             members1 = cluster.members


         for n0 in members1:

             _ = self.stath0[n0]

             for n1 in cluster.members:

                 if n0 != n1:

                     npairs += 1

                     _ = self.stath1[n1]

                     self.apply_molecular_assignments(n1)

                     tmp_rmsd, _ = self.rmsd()

                     rmsd += tmp_rmsd

                     self.undo_apply_molecular_assignments(n1)


         if npairs > 0:

             precision = rmsd/npairs

         cluster.precision = precision

         return precision


     def bipartite_precision(self, cluster1, cluster2, verbose=False):

         """

         Compute the bipartite precision (ie the cross-precision)

         between two clusters

         """

         npairs = 0

         rmsd = 0.0

         for cn0, n0 in enumerate(cluster1.members):

             _ = self.stath0[n0]

             for cn1, n1 in enumerate(cluster2.members):

                 _ = self.stath1[n1]

                 tmp_rmsd, _ = self.rmsd()

                 if verbose:

                     print("--- rmsd between structure %s and structure "

                           "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))

                 rmsd += tmp_rmsd

                 npairs += 1

         precision = rmsd/npairs

         return precision


     def rmsf(self, cluster, molecule, copy_index=0, state_index=0,

              cluster_ref=None, step=1):

         """

         Compute the Root mean square fluctuations

         of a molecule in a cluster

         Returns an IMP.pmi.tools.OrderedDict() where the keys are the

         residue indexes and the value is the rmsf

         """

         rmsf = IMP.pmi.tools.OrderedDict()


         # assumes that residue indexes are identical for stath0 and stath1

         if cluster_ref is not None:

             if cluster_ref.center_index is not None:

                 members0 = [cluster_ref.center_index]

             else:

                 members0 = cluster_ref.members

         else:

             if cluster.center_index is not None:

                 members0 = [cluster.center_index]

             else:

                 members0 = cluster.members


         s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,

                                 copy_index=copy_index, state_index=state_index)

         ps0 = s0.get_selected_particles()

         # get the residue indexes

         residue_indexes = list(IMP.pmi.tools.OrderedSet(

             [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))


         # get the corresponding particles

         npairs = 0

         for n0 in members0:

             d0 = self.stath0[n0]

             for n1 in cluster.members[::step]:

                 if n0 != n1:

                     print("--- rmsf %s %s" % (str(n0), str(n1)))

                     self.apply_molecular_assignments(n1)


                     s1 = IMP.atom.Selection(

                         self.stath1, molecule=molecule,

                         residue_indexes=residue_indexes, resolution=1,

                         copy_index=copy_index, state_index=state_index)

                     ps1 = s1.get_selected_particles()


                     d1 = self.stath1[n1]

                     if self.alignment:

                         self.align()

                     for n, (p0, p1) in enumerate(zip(ps0, ps1)):

                         r = residue_indexes[n]

                         d0 = IMP.core.XYZ(p0)

                         d1 = IMP.core.XYZ(p1)

                         if r in rmsf:

                             rmsf[r] += IMP.core.get_distance(d0, d1)

                         else:

                             rmsf[r] = IMP.core.get_distance(d0, d1)

                     npairs += 1

                     self.undo_apply_molecular_assignments(n1)

         for r in rmsf:

             rmsf[r] /= npairs


             for stath in [self.stath0, self.stath1]:

                 if molecule not in self.symmetric_molecules:

                     s = IMP.atom.Selection(

                         stath, molecule=molecule, residue_index=r,

                         resolution=1, copy_index=copy_index,

                         state_index=state_index)

                 else:

                     s = IMP.atom.Selection(

                         stath, molecule=molecule, residue_index=r,

                         resolution=1, state_index=state_index)


                 ps = s.get_selected_particles()

                 for p in ps:

                     if IMP.pmi.Uncertainty.get_is_setup(p):

                         IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])

                     else:

                         IMP.pmi.Uncertainty.setup_particle(p, rmsf[r])


         return rmsf


     def save_densities(self, cluster, density_custom_ranges, voxel_size=5,

                        reference="Absolute",  prefix="./", step=1):

         if self.alignment:

             self.set_reference(reference, cluster)

         dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,

                                                 voxel=voxel_size)


         for n1 in cluster.members[::step]:

             print("density "+str(n1))

             _ = self.stath1[n1]

             self.apply_molecular_assignments(n1)

             if self.alignment:

                 self.align()

             dens.add_subunits_density(self.stath1)

             self.undo_apply_molecular_assignments(n1)

         dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))

         del dens


     def contact_map(self, cluster, contact_threshold=15, log_scale=False,

                     consolidate=False, molecules=None, prefix='./',

                     reference="Absolute"):

         if self.alignment:

             self.set_reference(reference, cluster)

         import numpy as np

         import matplotlib.pyplot as plt

         import matplotlib.cm as cm

         from scipy.spatial.distance import cdist

         import IMP.pmi.topology

         if molecules is None:

             mols = [IMP.pmi.topology.PMIMoleculeHierarchy(mol)

                     for mol in IMP.pmi.tools.get_molecules(

                         IMP.atom.get_leaves(self.stath1))]

         else:

             mols = [IMP.pmi.topology.PMIMoleculeHierarchy(mol)

                     for mol in IMP.pmi.tools.get_molecules(

                         IMP.atom.Selection(

                             self.stath1,

                             molecules=molecules).get_selected_particles())]

         unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]

         mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)

         total_len_unique = sum(max(mol.get_residue_indexes())

                                for mol in unique_copies)


         index_dict = {}

         prev_stop = 0


         if not consolidate:

             for mol in mols:

                 seqlen = max(mol.get_residue_indexes())

                 index_dict[mol] = range(prev_stop, prev_stop + seqlen)

                 prev_stop += seqlen


         else:

             for mol in unique_copies:

                 seqlen = max(mol.get_residue_indexes())

                 index_dict[mol] = range(prev_stop, prev_stop + seqlen)

                 prev_stop += seqlen


         for ncl, n1 in enumerate(cluster.members):

             print(ncl)

             _ = self.stath1[n1]

             coord_dict = IMP.pmi.tools.OrderedDict()

             for mol in mols:

                 rindexes = mol.get_residue_indexes()

                 coords = np.ones((max(rindexes), 3))

                 for rnum in rindexes:

                     sel = IMP.atom.Selection(mol, residue_index=rnum,

                                              resolution=1)

                     selpart = sel.get_selected_particles()

                     if len(selpart) == 0:

                         continue

                     selpart = selpart[0]

                     coords[rnum - 1, :] = \

                         IMP.core.XYZ(selpart).get_coordinates()

                 coord_dict[mol] = coords


             if not consolidate:

                 coords = np.concatenate(list(coord_dict.values()))

                 dists = cdist(coords, coords)

                 binary_dists = np.where((dists <= contact_threshold)

                                         & (dists >= 1.0), 1.0, 0.0)

             else:

                 binary_dists_dict = {}

                 for mol1 in mols:

                     len1 = max(mol1.get_residue_indexes())

                     for mol2 in mols:

                         name1 = mol1.get_name()

                         name2 = mol2.get_name()

                         dists = cdist(coord_dict[mol1], coord_dict[mol2])

                         if (name1, name2) not in binary_dists_dict:

                             binary_dists_dict[(name1, name2)] = \

                                 np.zeros((len1, len1))

                         binary_dists_dict[(name1, name2)] += \

                             np.where((dists <= contact_threshold)

                                      & (dists >= 1.0), 1.0, 0.0)

                 binary_dists = np.zeros((total_len_unique, total_len_unique))


                 for name1, name2 in binary_dists_dict:

                     r1 = index_dict[mol_names_unique[name1]]

                     r2 = index_dict[mol_names_unique[name2]]

                     binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \

                         np.where((binary_dists_dict[(name1, name2)] >= 1.0),

                                  1.0, 0.0)


             if ncl == 0:

                 dist_maps = [dists]

                 av_dist_map = dists

                 contact_freqs = binary_dists

             else:

                 dist_maps.append(dists)

                 av_dist_map += dists

                 contact_freqs += binary_dists


         if log_scale:

             contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)

         else:

             contact_freqs = 1.0/len(cluster)*contact_freqs

         av_dist_map = 1.0/len(cluster)*contact_freqs


         fig = plt.figure(figsize=(100, 100))

         ax = fig.add_subplot(111)

         ax.set_xticks([])

         ax.set_yticks([])

         gap_between_components = 50

         colormap = cm.Blues

         colornorm = None


         if not consolidate:

             sorted_tuple = sorted(

                 (IMP.pmi.topology.PMIMoleculeHierarchy(

                     mol).get_extended_name(), mol) for mol in mols)

             prot_list = list(zip(*sorted_tuple))[1]

         else:

             sorted_tuple = sorted(

                 (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)

                 for mol in unique_copies)

             prot_list = list(zip(*sorted_tuple))[1]


         prot_listx = prot_list

         nresx = gap_between_components + \

             sum([max(mol.get_residue_indexes())

                 + gap_between_components for mol in prot_listx])


         # set the list of proteins on the y axis

         prot_listy = prot_list

         nresy = gap_between_components + \

             sum([max(mol.get_residue_indexes())

                 + gap_between_components for mol in prot_listy])


         # this is the residue offset for each protein

         resoffsetx = {}

         resendx = {}

         res = gap_between_components

         for mol in prot_listx:

             resoffsetx[mol] = res

             res += max(mol.get_residue_indexes())

             resendx[mol] = res

             res += gap_between_components


         resoffsety = {}

         resendy = {}

         res = gap_between_components

         for mol in prot_listy:

             resoffsety[mol] = res

             res += max(mol.get_residue_indexes())

             resendy[mol] = res

             res += gap_between_components


         resoffsetdiagonal = {}

         res = gap_between_components

         for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):

             resoffsetdiagonal[mol] = res

             res += max(mol.get_residue_indexes())

             res += gap_between_components


         # plot protein boundaries

         xticks = []

         xlabels = []

         for n, prot in enumerate(prot_listx):

             res = resoffsetx[prot]

             end = resendx[prot]

             for proty in prot_listy:

                 resy = resoffsety[proty]

                 endy = resendy[proty]

                 ax.plot([res, res], [resy, endy], linestyle='-',

                         color='gray', lw=0.4)

                 ax.plot([end, end], [resy, endy], linestyle='-',

                         color='gray', lw=0.4)

             xticks.append((float(res) + float(end)) / 2)

             xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(

                 prot).get_extended_name())


         yticks = []

         ylabels = []

         for n, prot in enumerate(prot_listy):

             res = resoffsety[prot]

             end = resendy[prot]

             for protx in prot_listx:

                 resx = resoffsetx[protx]

                 endx = resendx[protx]

                 ax.plot([resx, endx], [res, res], linestyle='-',

                         color='gray', lw=0.4)

                 ax.plot([resx, endx], [end, end], linestyle='-',

                         color='gray', lw=0.4)

             yticks.append((float(res) + float(end)) / 2)

             ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(

                 prot).get_extended_name())


         # plot the contact map


         tmp_array = np.zeros((nresx, nresy))

         ret = {}

         for px in prot_listx:

             for py in prot_listy:

                 resx = resoffsetx[px]

                 lengx = resendx[px] - 1

                 resy = resoffsety[py]

                 lengy = resendy[py] - 1

                 indexes_x = index_dict[px]

                 minx = min(indexes_x)

                 maxx = max(indexes_x)

                 indexes_y = index_dict[py]

                 miny = min(indexes_y)

                 maxy = max(indexes_y)

                 tmp_array[resx:lengx, resy:lengy] = \

                     contact_freqs[minx:maxx, miny:maxy]

                 ret[(px, py)] = np.argwhere(

                     contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1


         ax.imshow(tmp_array, cmap=colormap, norm=colornorm,

                   origin='lower', alpha=0.6, interpolation='nearest')


         ax.set_xticks(xticks)

         ax.set_xticklabels(xlabels, rotation=90)

         ax.set_yticks(yticks)

         ax.set_yticklabels(ylabels)

         plt.setp(ax.get_xticklabels(), fontsize=6)

         plt.setp(ax.get_yticklabels(), fontsize=6)


         # display and write to file

         fig.set_size_inches(0.005 * nresx, 0.005 * nresy)

         [i.set_linewidth(2.0) for i in ax.spines.values()]


         plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",

                     dpi=300, transparent="False")

         return ret


     def plot_rmsd_matrix(self, filename):

         self.compute_all_pairwise_rmsd()

         distance_matrix = np.zeros(

             (len(self.stath0), len(self.stath1)))

         for (n0, n1) in self.pairwise_rmsd:

             distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]


         import matplotlib as mpl

         mpl.use('Agg')

         import matplotlib.pylab as pl

         from scipy.cluster import hierarchy as hrc


         fig = pl.figure(figsize=(10, 8))

         ax = fig.add_subplot(212)

         dendrogram = hrc.dendrogram(

             hrc.linkage(distance_matrix),

             color_threshold=7,

             no_labels=True)

         leaves_order = dendrogram['leaves']

         ax.set_xlabel('Model')

         ax.set_ylabel('RMSD [Angstroms]')


         ax2 = fig.add_subplot(221)

         cax = ax2.imshow(

             distance_matrix[leaves_order, :][:, leaves_order],

             interpolation='nearest')

         cb = fig.colorbar(cax)

         cb.set_label('RMSD [Angstroms]')

         ax2.set_xlabel('Model')

         ax2.set_ylabel('Model')


         pl.savefig(filename, dpi=300)

         pl.close(fig)


     ####################

     # Internal Functions

     ####################


     def update_clusters(self):

         """

         Update the cluster id numbers

         """

         for n, c in enumerate(self.clusters):

             c.cluster_id = n


     def get_molecule(self, hier, name, copy):

         s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)

         return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]


     def update_seldicts(self):

         """

         Update the seldicts

         """

         self.seldict0 = IMP.pmi.tools.get_selections_dictionary(

             self.sel0_rmsd.get_selected_particles())

         self.seldict1 = IMP.pmi.tools.get_selections_dictionary(

             self.sel1_rmsd.get_selected_particles())

         for mol in self.seldict0:

             for sel in self.seldict0[mol]:

                 self.issymmetricsel[sel] = False

         for mol in self.symmetric_molecules:

             self.symmetric_molecules[mol] = len(self.seldict0[mol])

             for sel in self.seldict0[mol]:

                 self.issymmetricsel[sel] = True


     def align(self):

         tr = IMP.atom.get_transformation_aligning_first_to_second(

             self.sel1_alignment, self.sel0_alignment)


         for rb in self.rbs1:

             IMP.core.transform(rb, tr)


         for bead in self.beads1:

             try:

                 IMP.core.transform(IMP.core.XYZ(bead), tr)

             except:  # noqa: E722

                 continue


         self.model.update()


     def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):

         '''

         initial filling of the clusters.

         '''

         n0 = idxs.pop()

         print("clustering model "+str(n0))

         d0 = self.stath0[n0]

         c = IMP.pmi.output.Cluster(len(self.clusters))

         print("creating cluster index "+str(len(self.clusters)))

         self.clusters.append(c)

         c.add_member(n0, d0)

         clustered = set([n0])

         for n1 in idxs:

             print("--- trying to add model " + str(n1) + " to cluster "

                   + str(len(self.clusters)))

             d1 = self.stath1[n1]

             if self.alignment:

                 self.align()

             rmsd, _ = self.rmsd(metric=metric)

             if rmsd < rmsd_cutoff:

                 print("--- model "+str(n1)+" added, rmsd="+str(rmsd))

                 c.add_member(n1, d1)

                 clustered.add(n1)

             else:

                 print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))

         idxs -= clustered


     def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):

         """

         merge the clusters that have close members


         @param rmsd_cutoff cutoff distance in Angstorms

         @param metric Function to calculate distance between two Selections

                (by default, IMP.atom.get_rmsd is used)

         """

         # before merging, clusters are spheres of radius rmsd_cutoff

         # centered on the 1st element

         # here we only try to merge clusters whose centers are closer

         # than 2*rmsd_cutoff

         to_merge = []

         print("merging...")

         for c0, c1 in filter(lambda x: len(x[0].members) > 1,

                              itertools.combinations(self.clusters, 2)):

             n0, n1 = [c.members[0] for c in (c0, c1)]

             _ = self.stath0[n0]

             _ = self.stath1[n1]

             rmsd, _ = self.rmsd()

             if (rmsd < 2*rmsd_cutoff and

                     self.have_close_members(c0, c1, rmsd_cutoff, metric)):

                 to_merge.append((c0, c1))


         for c0, c in reversed(to_merge):

             self.merge(c0, c)


         # keep only full clusters

         self.clusters = [c for c in

                          filter(lambda x: len(x.members) > 0, self.clusters)]


     def have_close_members(self, c0, c1, rmsd_cutoff, metric):

         '''

         returns true if c0 and c1 have members that are closer than rmsd_cutoff

         '''

         print("check close members for clusters " + str(c0.cluster_id) +

               " and " + str(c1.cluster_id))

         for n0, n1 in itertools.product(c0.members[1:], c1.members):

             _ = self.stath0[n0]

             _ = self.stath1[n1]

             rmsd, _ = self.rmsd(metric=metric)

             if rmsd < rmsd_cutoff:

                 return True


         return False


     def merge(self, c0, c1):

         '''

         merge two clusters

         '''

         c0 += c1

         c1.members = []

         c1.data = {}


     def rmsd_helper(self, sels0, sels1, metric):

         '''

         a function that returns the permutation best_sel of sels0 that

         minimizes metric

         '''

         best_rmsd2 = float('inf')

         best_sel = None

         if self.issymmetricsel[sels0[0]]:

             # this cases happens when symmetries were defined

             N = len(sels0)

             for offset in range(N):

                 sels = [sels0[(offset+i) % N] for i in range(N)]

                 sel0 = sels[0]

                 sel1 = sels1[0]

                 r = metric(sel0, sel1)

                 rmsd2 = r*r*N

                 if rmsd2 < best_rmsd2:

                     best_rmsd2 = rmsd2

                     best_sel = sels

         else:

             for sels in itertools.permutations(sels0):

                 rmsd2 = 0.0

                 for sel0, sel1 in itertools.takewhile(

                         lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):

                     r = metric(sel0, sel1)

                     rmsd2 += r*r

                 if rmsd2 < best_rmsd2:

                     best_rmsd2 = rmsd2

                     best_sel = sels

         return best_sel, best_rmsd2


     def compute_all_pairwise_rmsd(self):

         for d0 in self.stath0:

             for d1 in self.stath1:

                 rmsd, _ = self.rmsd()


     def rmsd(self, metric=IMP.atom.get_rmsd):

         '''

         Computes the RMSD. Resolves ambiguous pairs assignments

         '''

         # here we memoize the rmsd and molecular assignment so that it's

         # not done multiple times

         n0 = self.stath0.current_index

         n1 = self.stath1.current_index

         if ((n0, n1) in self.pairwise_rmsd) \

                 and ((n0, n1) in self.pairwise_molecular_assignment):

             return (self.pairwise_rmsd[(n0, n1)],

                     self.pairwise_molecular_assignment[(n0, n1)])


         if self.alignment:

             self.align()

         # if it's not yet memoized

         total_rmsd = 0.0

         total_N = 0

         # this is a dictionary which keys are the molecule names, and values

         # are the list of IMP.atom.Selection for all molecules that share

         # the molecule name

         molecular_assignment = {}

         for molname, sels0 in self.seldict0.items():

             sels_best_order, best_rmsd2 = \

                 self.rmsd_helper(sels0, self.seldict1[molname], metric)


             Ncoords = len(sels_best_order[0].get_selected_particles())

             Ncopies = len(self.seldict1[molname])

             total_rmsd += Ncoords*best_rmsd2

             total_N += Ncoords*Ncopies


             for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):

                 p0 = sel0.get_selected_particles()[0]

                 p1 = sel1.get_selected_particles()[0]

                 m0 = IMP.pmi.tools.get_molecules([p0])[0]

                 m1 = IMP.pmi.tools.get_molecules([p1])[0]

                 c0 = IMP.atom.Copy(m0).get_copy_index()

                 c1 = IMP.atom.Copy(m1).get_copy_index()

                 molecular_assignment[(molname, c0)] = (molname, c1)


         total_rmsd = math.sqrt(total_rmsd/total_N)


         self.pairwise_rmsd[(n0, n1)] = total_rmsd

         self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment

         self.pairwise_rmsd[(n1, n0)] = total_rmsd

         self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment

         return total_rmsd, molecular_assignment


     def set_reference(self, reference, cluster):

         """

         Fix the reference structure for structural alignment, rmsd and

         chain assignment


         @param reference can be either "Absolute" (cluster center of the

                first cluster) or Relative (cluster center of the current

                cluster)

         #param cluster the reference IMP.pmi.output.Cluster object

         """

         if reference == "Absolute":

             _ = self.stath0[0]

         elif reference == "Relative":

             if cluster.center_index:

                 n0 = cluster.center_index

             else:

                 n0 = cluster.members[0]

             _ = self.stath0[n0]


     def apply_molecular_assignments(self, n1):

         """

         compute the molecular assignments between multiple copies

         of the same sequence. It changes the Copy index of Molecules

         """

         _ = self.stath1[n1]

         _, molecular_assignment = self.rmsd()

         for (m0, c0), (m1, c1) in molecular_assignment.items():

             mol0 = self.molcopydict0[m0][c0]

             mol1 = self.molcopydict1[m1][c1]

             cik0 = IMP.atom.Copy(mol0).get_copy_index_key()

             p1 = IMP.atom.Copy(mol1).get_particle()

             p1.set_value(cik0, c0)


     def undo_apply_molecular_assignments(self, n1):

         """

         Undo the Copy index assignment

         """

         _ = self.stath1[n1]

         _, molecular_assignment = self.rmsd()

         for (m0, c0), (m1, c1) in molecular_assignment.items():

             mol0 = self.molcopydict0[m0][c0]

             mol1 = self.molcopydict1[m1][c1]

             cik0 = IMP.atom.Copy(mol0).get_copy_index_key()

             p1 = IMP.atom.Copy(mol1).get_particle()

             p1.set_value(cik0, c1)


     ####################

     # Container Functions

     ####################


     def __repr__(self):

         s = "AnalysisReplicaExchange\n"

         s += "---- number of clusters %s \n" % str(len(self.clusters))

         s += "---- number of models %s \n" % str(len(self.stath0))

         return s


     def __getitem__(self, int_slice_adaptor):

         if isinstance(int_slice_adaptor, int):

             return self.clusters[int_slice_adaptor]

         elif isinstance(int_slice_adaptor, slice):

             return self.__iter__(int_slice_adaptor)

         else:

             raise TypeError("Unknown Type")


     def __len__(self):

         return len(self.clusters)


     def __iter__(self, slice_key=None):

         if slice_key is None:

             for i in range(len(self)):

                 yield self[i]

         else:

             for i in range(len(self))[slice_key]:

                 yield self[i]

IMP::pmi.dof.DegreesOfFreedom
Simplify creation of constraints and movers for an IMP Hierarchy.
Definition: pmi/dof/__init__.py:39

IMP::pmi.macros.AnalysisReplicaExchange.rmsd
def rmsd
Computes the RMSD.
Definition: macros.py:2494

IMP::pmi.macros.AnalysisReplicaExchange.set_reference
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2542

IMP::pmi.macros.AnalysisReplicaExchange.load_clusters
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1823

IMP::pmi.tools.select_at_all_resolutions
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1062

IMP::pmi.macros.AnalysisReplicaExchange.precision
def precision
Compute the precision of a cluster.
Definition: macros.py:1921

IMP::pmi.topology.PMIMoleculeHierarchy
Extends the functionality of IMP.atom.Molecule.
Definition: pmi/topology/__init__.py:1833

IMP::pmi.macros.AnalysisReplicaExchange0
A macro for running all the basic operations of analysis.
Definition: macros.py:939

IMP::pmi.tools.get_restraint_set
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:104

IMP::pmi.output.Cluster
A container for models organized into clusters.
Definition: output.py:1506

IMP::pmi.samplers.MolecularDynamics
Sample using molecular dynamics.
Definition: samplers.py:326

IMP::pmi.macros.AnalysisReplicaExchange.aggregate
def aggregate
initial filling of the clusters.
Definition: macros.py:2377

IMP::core::RigidBodyMember
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634

IMP::pmi.macros.ReplicaExchange
A macro to help setup and run replica exchange.
Definition: macros.py:65

IMP::core::RigidBodyMember::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635

IMP::pmi.topology
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
Definition: pmi/topology/__init__.py:1

IMP::pmi.macros.AnalysisReplicaExchange.prune_redundant_structures
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1886

IMP::pmi.macros.AnalysisReplicaExchange.rmsf
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1970

IMP::core::XYZR::setup_particle
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48

IMP::pmi.io
Utility classes and functions for reading and storing PMI files.
Definition: pmi/io/__init__.py:1

IMP::pmi.io.get_best_models
def get_best_models
Given a list of stat files, read them all and find the best models.
Definition: pmi/io/__init__.py:293

IMP::pmi.tools.get_molecules
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1159

IMP::pmi.io.TotalScoreOutput
A helper output for model evaluation.
Definition: pmi/io/__init__.py:546

IMP::pmi.tools
Miscellaneous utilities.
Definition: tools.py:1

IMP::pmi.macros.AnalysisReplicaExchange.set_rmsd_selection
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1683

IMP::pmi.macros.AnalysisReplicaExchange.get_cluster_data
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1764

IMP::pmi.macros.AnalysisReplicaExchange.__init__
def __init__
Construction of the Class.
Definition: macros.py:1636

IMP::pmi.macros.BuildSystem.get_molecules
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:843

IMP::pmi.macros.AnalysisReplicaExchange.set_data
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1781

IMP::pmi.macros.AnalysisReplicaExchange.undo_apply_molecular_assignments
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2575

IMP::pmi.macros.AnalysisReplicaExchange.set_alignment_selection
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1699

IMP::pmi.macros.AnalysisReplicaExchange.rmsd_helper
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2458

IMP::pmi.macros.AnalysisReplicaExchange.save_coordinates
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1861

IMP::pmi.macros.AnalysisReplicaExchange0.clustering
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1082

IMP::pmi.macros.AnalysisReplicaExchange.apply_molecular_assignments
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2561

IMP::pmi.macros.AnalysisReplicaExchange
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1625

IMP::pmi::Uncertainty
Add uncertainty to a particle.
Definition: Uncertainty.h:24

IMP::pmi.macros.BuildSystem
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:630

IMP::pmi.macros.AnalysisReplicaExchange.merge_aggregates
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2404

IMP::pmi.topology.System
Represent the root node of the global IMP.atom.Hierarchy.
Definition: pmi/topology/__init__.py:152

IMP::core::get_distance
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89

IMP::pmi.analysis.Clustering
A class to cluster structures.
Definition: pmi/Analysis.py:183

IMP::pmi.macros.AnalysisReplicaExchange0.add_protocol_output
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:991

IMP::pmi.macros.AnalysisReplicaExchange0.__init__
def __init__
Constructor.
Definition: macros.py:951

IMP::pmi::Uncertainty::setup_particle
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45

IMP::pmi.macros.AnalysisReplicaExchange.compute_cluster_center
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1843

IMP::core::XYZR::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47

IMP::pmi.macros.AnalysisReplicaExchange0.get_modeling_trajectory
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1002

IMP::pmi.StructureWarning
Warning related to handling of structures.
Definition: pmi/__init__.py:590

IMP::atom::Copy
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28

IMP::atom::Hierarchy::setup_particle
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
Definition: atom/Hierarchy.h:240

IMP::pmi.io.get_trajectory_models
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
Definition: pmi/io/__init__.py:390

IMP::pmi.macros.ReplicaExchange.__init__
def __init__
Constructor.
Definition: macros.py:137

IMP::atom::Hierarchy
The standard decorator for manipulating molecular structures.
Definition: atom/Hierarchy.h:192

IMP::pmi.analysis.Alignment
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21

IMP::pmi.macros.AnalysisReplicaExchange.update_seldicts
def update_seldicts
Update the seldicts.
Definition: macros.py:2346

IMP::pmi.macros.AnalysisReplicaExchange.update_clusters
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2335

IMP::pmi.tools.scatter_and_gather
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:542

IMP::core::transform
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.

IMP::pmi.macros.AnalysisReplicaExchange.refine
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1728

IMP::core::XYZ
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30

IMP::pmi.output.Output
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199

IMP::pmi.tools.Stopwatch
Collect timing information.
Definition: tools.py:119

IMP::pmi.macros.AnalysisReplicaExchange.set_symmetric
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1692

IMP::pmi.MissingFileWarning
Warning for an expected, but missing, file.
Definition: pmi/__init__.py:600

IMP::pmi.analysis
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1

IMP::pmi.io.get_bead_sizes
def get_bead_sizes
Definition: pmi/io/__init__.py:513

IMP::algebra::get_identity_transformation_3d
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Definition: Transformation3D.h:134

IMP::pmi.output
Classes for writing output files and processing them.
Definition: output.py:1

IMP.deprecated_object
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9590

IMP::pmi.samplers
Sampling of the system.
Definition: samplers.py:1

IMP::pmi.samplers.MonteCarlo
Sample using Monte Carlo.
Definition: samplers.py:46

IMP::pmi.dof
Create movers and set up constraints for PMI objects.
Definition: pmi/dof/__init__.py:1

IMP::pmi.macros.AnalysisReplicaExchange.merge
def merge
merge two clusters
Definition: macros.py:2450

IMP::pmi.macros.BuildSystem.add_state
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:683

IMP::Exception
The general base class for IMP exceptions.
Definition: exception.h:48

IMP::core::SampleProvenance::setup_particle
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266

IMP::pmi.output.StatHierarchyHandler
class to link stat files to several rmf files
Definition: output.py:1253

IMP::pmi.alphabets
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1

IMP::pmi.macros.AnalysisReplicaExchange.save_data
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1774

IMP::Particle
Class to handle individual particles of a Model object.
Definition: Particle.h:43

IMP::pmi.macros.BuildSystem.execute_macro
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:854

IMP::pmi.macros.AnalysisReplicaExchange.bipartite_precision
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1950

IMP::pmi.io.read_coordinates_of_rmfs
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
Definition: pmi/io/__init__.py:434

IMP::pmi.macros.BuildSystem.__init__
def __init__
Constructor.
Definition: macros.py:659

IMP::atom::get_copy_index
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.

IMP::pmi.macros.AnalysisReplicaExchange.cluster
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1715

IMP::pmi::Uncertainty::get_is_setup
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30

IMP::pmi.macros.AnalysisReplicaExchange.save_clusters
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1814

IMP::pmi.macros.AnalysisReplicaExchange.have_close_members
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2435

IMP::rmf::add_geometries
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.

IMP::atom::get_transformation_aligning_first_to_second
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
Definition: bayesianem/utilities.h:95

IMP::pmi.topology.Sequences
A dictionary-like wrapper for reading and storing sequence data.
Definition: pmi/topology/__init__.py:1011

IMP::pmi.tools.get_rbs_and_beads
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1135

IMP::core::add_provenance
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.

IMP::atom::get_leaves
Hierarchies get_leaves(const Selection &h)

IMP::atom::Selection
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70

IMP::pmi.analysis.GetModelDensity
Compute mean density maps from structures.
Definition: pmi/Analysis.py:1154

IMP::pmi.macros.AnalysisReplicaExchange.load_data
def load_data
Load the data from an external pickled file.
Definition: macros.py:1789

IMP::rmf
Support for the RMF file format for storing hierarchical molecular data and markup.

IMP::pmi.tools.get_residue_indexes
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499

IMP::pmi.samplers.ReplicaExchange
Sample using replica exchange.
Definition: samplers.py:431

IMP::pmi.ParameterWarning
Warning for probably incorrect input parameters.
Definition: pmi/__init__.py:595

IMP::pmi.io.add_provenance
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Definition: pmi/io/__init__.py:281

IMP::isd
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...

IMP::core::XYZR
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27