3 """@namespace IMP.pmi1.analysis 
    4    Tools for clustering and cluster analysis 
    6 from __future__ 
import print_function
 
   16 from operator 
import itemgetter
 
   17 from copy 
import deepcopy
 
   18 from math 
import log,sqrt
 
   24     """Performs alignment and RMSD calculation for two sets of coordinates 
   26     The class also takes into account non-equal stoichiometry of the proteins. 
   27     If this is the case, the protein names of proteins in multiple copies 
   28     should be specified in the following form: 
   29     nameA..1, nameA..2 (note two dots). 
   32     def __init__(self, template, query, weights=None):
 
   34            @param query {'p1':coords(L,3), 'p2':coords(L,3)} 
   35            @param template {'p1':coords(L,3), 'p2':coords(L,3)} 
   36            @param weights optional weights for each set of coordinates 
   39         self.template = template
 
   42         if len(self.query.keys()) != len(self.template.keys()):
 
   43             raise ValueError(
'''the number of proteins 
   44                                in template and query does not match!''')
 
   50         self.proteins = sorted(self.query.keys())
 
   51         prots_uniq = [i.split(
'..')[0] 
for i 
in self.proteins]
 
   59             np = prots_uniq.count(p)
 
   60             copies = [i 
for i 
in self.proteins 
if i.split(
'..')[0] == p]
 
   61             prmts = list(itertools.permutations(copies, len(copies)))
 
   64         self.Product = list(itertools.product(*P.values()))
 
   72         torder = sum([list(i) 
for i 
in self.Product[0]], [])
 
   75             if self.weights 
is not None:
 
   76                 weights += [i 
for i 
in self.weights[t]]
 
   79         self.rmsd = 10000000000.
 
   80         for comb 
in self.Product:
 
   82             order = sum([list(i) 
for i 
in comb], [])
 
   92             if self.weights 
is not None:
 
  103         from scipy.spatial.distance 
import cdist
 
  111         torder = sum([list(i) 
for i 
in self.Product[0]], [])
 
  115         self.rmsd, Transformation = 10000000000., 
'' 
  118         for comb 
in self.Product:
 
  119             order = sum([list(i) 
for i 
in comb], [])
 
  125             if len(template_xyz) != len(query_xyz):
 
  126                 raise ValueError(
'''the number of coordinates 
  127                                in template and query does not match!''')
 
  132             query_xyz_tr = [transformation.get_transformed(n)
 
  136                 sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2)) / len(template_xyz))
 
  139                 Transformation = transformation
 
  142         return (self.rmsd, Transformation)
 
  147 Proteins = {'a..1':np.array([np.array([-1.,1.])]), 
  148             'a..2':np.array([np.array([1.,1.,])]), 
  149             'a..3':np.array([np.array([-2.,1.])]), 
  150             'b':np.array([np.array([0.,-1.])]), 
  151             'c..1':np.array([np.array([-1.,-1.])]), 
  152             'c..2':np.array([np.array([1.,-1.])]), 
  153             'd':np.array([np.array([0.,0.])]), 
  154             'e':np.array([np.array([0.,1.])])} 
  156 Ali = Alignment(Proteins, Proteins) 
  158 if Ali.get_rmsd() == 0.0: print 'successful test!' 
  159 else: print 'ERROR!'; exit() 
  164 class Violations(object):
 
  168         self.violation_thresholds = {}
 
  169         self.violation_counts = {}
 
  171         data = open(filename)
 
  176             d = d.strip().split()
 
  177             self.violation_thresholds[d[0]] = float(d[1])
 
  179     def get_number_violated_restraints(self, rsts_dict):
 
  181         for rst 
in self.violation_thresholds:
 
  182             if rst 
not in rsts_dict:
 
  184             if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
 
  186                 if rst 
not in self.violation_counts:
 
  187                     self.violation_counts[rst] = 1
 
  189                     self.violation_counts[rst] += 1
 
  195     """A class to cluster structures. 
  196     Uses scipy's cdist function to compute distance matrices 
  197     and sklearn's kmeans clustering module. 
  201            @param rmsd_weights Flat list of weights for each particle 
  205             from mpi4py 
import MPI
 
  206             self.comm = MPI.COMM_WORLD
 
  207             self.rank = self.comm.Get_rank()
 
  208             self.number_of_processes = self.comm.size
 
  210             self.number_of_processes = 1
 
  213         self.structure_cluster_ids = 
None 
  214         self.tmpl_coords = 
None 
  215         self.rmsd_weights=rmsd_weights
 
  217     def set_template(self, part_coords):
 
  219         self.tmpl_coords = part_coords
 
  222         """Add coordinates for a single model.""" 
  224         self.all_coords[frame] = Coords
 
  226     def dist_matrix(self):
 
  228         self.model_list_names = list(self.all_coords.keys())
 
  229         self.model_indexes = list(range(len(self.model_list_names)))
 
  230         self.model_indexes_dict = dict(
 
  231             list(zip(self.model_list_names, self.model_indexes)))
 
  232         model_indexes_unique_pairs = list(itertools.combinations(self.model_indexes, 2))
 
  234         my_model_indexes_unique_pairs = IMP.pmi1.tools.chunk_list_into_segments(
 
  235             model_indexes_unique_pairs,
 
  236             self.number_of_processes)[self.rank]
 
  238         print(
"process %s assigned with %s pairs" % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
 
  240         (raw_distance_dict, self.transformation_distance_dict) = self.matrix_calculation(self.all_coords,
 
  242                                                                                          my_model_indexes_unique_pairs)
 
  244         if self.number_of_processes > 1:
 
  247             pickable_transformations = self.get_pickable_transformation_distance_dict(
 
  250                 pickable_transformations)
 
  251             self.set_transformation_distance_dict_from_pickable(
 
  252                 pickable_transformations)
 
  254         self.raw_distance_matrix = np.zeros(
 
  255             (len(self.model_list_names), len(self.model_list_names)))
 
  256         for item 
in raw_distance_dict:
 
  258             self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
 
  259             self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
 
  261     def get_dist_matrix(self):
 
  262         return self.raw_distance_matrix
 
  265         """Run K-means clustering 
  266         @param number_of_clusters Num means 
  267         @param seed the random seed 
  269         from sklearn.cluster 
import KMeans
 
  274             kmeans = KMeans(n_clusters=number_of_clusters)
 
  277             kmeans = KMeans(k=number_of_clusters)
 
  278         kmeans.fit_predict(self.raw_distance_matrix)
 
  280         self.structure_cluster_ids = kmeans.labels_
 
  282     def get_pickable_transformation_distance_dict(self):
 
  283         pickable_transformations = {}
 
  284         for label 
in self.transformation_distance_dict:
 
  285             tr = self.transformation_distance_dict[label]
 
  286             trans = tuple(tr.get_translation())
 
  287             rot = tuple(tr.get_rotation().get_quaternion())
 
  288             pickable_transformations[label] = (rot, trans)
 
  289         return pickable_transformations
 
  291     def set_transformation_distance_dict_from_pickable(
 
  293             pickable_transformations):
 
  294         self.transformation_distance_dict = {}
 
  295         for label 
in pickable_transformations:
 
  296             tr = pickable_transformations[label]
 
  299             self.transformation_distance_dict[
 
  302     def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
 
  304         outf = open(file_name + 
".data", 
'wb')
 
  310         pickable_transformations = self.get_pickable_transformation_distance_dict(
 
  313             (self.structure_cluster_ids,
 
  314              self.model_list_names,
 
  315              pickable_transformations),
 
  318         np.save(file_name + 
".npy", self.raw_distance_matrix)
 
  320     def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
 
  323         inputf = open(file_name + 
".data", 
'rb')
 
  324         (self.structure_cluster_ids, self.model_list_names,
 
  325          pickable_transformations) = pickle.load(inputf)
 
  328         self.raw_distance_matrix = np.load(file_name + 
".npy")
 
  330         self.set_transformation_distance_dict_from_pickable(
 
  331             pickable_transformations)
 
  332         self.model_indexes = list(range(len(self.model_list_names)))
 
  333         self.model_indexes_dict = dict(
 
  334             list(zip(self.model_list_names, self.model_indexes)))
 
  336     def plot_matrix(self, figurename="clustermatrix.pdf"):
 
  337         import matplotlib 
as mpl
 
  339         import matplotlib.pylab 
as pl
 
  340         from scipy.cluster 
import hierarchy 
as hrc
 
  342         fig = pl.figure(figsize=(10,8))
 
  343         ax = fig.add_subplot(212)
 
  344         dendrogram = hrc.dendrogram(
 
  345             hrc.linkage(self.raw_distance_matrix),
 
  348         leaves_order = dendrogram[
'leaves']
 
  349         ax.set_xlabel(
'Model')
 
  350         ax.set_ylabel(
'RMSD [Angstroms]')
 
  352         ax2 = fig.add_subplot(221)
 
  354             self.raw_distance_matrix[leaves_order,
 
  357             interpolation=
'nearest')
 
  358         cb = fig.colorbar(cax)
 
  359         cb.set_label(
'RMSD [Angstroms]')
 
  360         ax2.set_xlabel(
'Model')
 
  361         ax2.set_ylabel(
'Model')
 
  363         pl.savefig(figurename, dpi=300)
 
  366     def get_model_index_from_name(self, name):
 
  367         return self.model_indexes_dict[name]
 
  369     def get_cluster_labels(self):
 
  371         return list(set(self.structure_cluster_ids))
 
  373     def get_number_of_clusters(self):
 
  374         return len(self.get_cluster_labels())
 
  376     def get_cluster_label_indexes(self, label):
 
  378             [i 
for i, l 
in enumerate(self.structure_cluster_ids) 
if l == label]
 
  381     def get_cluster_label_names(self, label):
 
  383             [self.model_list_names[i]
 
  384                 for i 
in self.get_cluster_label_indexes(label)]
 
  387     def get_cluster_label_average_rmsd(self, label):
 
  389         indexes = self.get_cluster_label_indexes(label)
 
  392             sub_distance_matrix = self.raw_distance_matrix[
 
  393                 indexes, :][:, indexes]
 
  394             average_rmsd = np.sum(sub_distance_matrix) / \
 
  395                 (len(sub_distance_matrix)
 
  396                  ** 2 - len(sub_distance_matrix))
 
  401     def get_cluster_label_size(self, label):
 
  402         return len(self.get_cluster_label_indexes(label))
 
  404     def get_transformation_to_first_member(
 
  408         reference = self.get_cluster_label_indexes(cluster_label)[0]
 
  409         return self.transformation_distance_dict[(reference, structure_index)]
 
  411     def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
 
  413         model_list_names = list(all_coords.keys())
 
  414         rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
 
  415         raw_distance_dict = {}
 
  416         transformation_distance_dict = {}
 
  417         if template_coords 
is None:
 
  421             alignment_template_protein_names = list(template_coords.keys())
 
  423         for (f1, f2) 
in list_of_pairs:
 
  431                 coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
 
  432                                  for pr 
in rmsd_protein_names])
 
  434                 for pr 
in rmsd_protein_names:
 
  435                     coords_f2[pr] = all_coords[model_list_names[f2]][pr]
 
  437                 Ali = 
Alignment(coords_f1, coords_f2, self.rmsd_weights)
 
  438                 rmsd = Ali.get_rmsd()
 
  444                 coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
 
  445                                  for pr 
in alignment_template_protein_names])
 
  446                 coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
 
  447                                  for pr 
in alignment_template_protein_names])
 
  450                 template_rmsd, transformation = Ali.align()
 
  455                 coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
 
  456                                  for pr 
in rmsd_protein_names])
 
  458                 for pr 
in rmsd_protein_names:
 
  459                     coords_f2[pr] = [transformation.get_transformed(
 
  460                         i) 
for i 
in all_coords[model_list_names[f2]][pr]]
 
  462                 Ali = 
Alignment(coords_f1, coords_f2, self.rmsd_weights)
 
  463                 rmsd = Ali.get_rmsd()
 
  465             raw_distance_dict[(f1, f2)] = rmsd
 
  466             raw_distance_dict[(f2, f1)] = rmsd
 
  467             transformation_distance_dict[(f1, f2)] = transformation
 
  468             transformation_distance_dict[(f2, f1)] = transformation
 
  470         return raw_distance_dict, transformation_distance_dict
 
  474     """Compute the RMSD (without alignment) taking into account the copy ambiguity. 
  475     To be used with pmi2 hierarchies. Can be used for instance as follows: 
  477     rmsd=IMP.pmi1.analysis.RMSD(hier,hier,[mol.get_name() for mol in mols],dynamic0=True,dynamic1=False) 
  478     output_objects.append(rmsd) 
  480     before shuffling the coordinates 
  483     def __init__(self,hier0,hier1,molnames,label="None",dynamic0=True,dynamic1=True,metric=IMP.algebra.get_rmsd):
 
  485         @param hier0 first input hierarchy 
  486         @param hier1 second input hierarchy 
  487         @param molname the names of the molecules used for the RMSD 
  488         @dynamic0 if True stores the decorators XYZ and coordinates of hier0 can be update. If false coordinates are static (stored in Vector3Ds) 
  489         and will never be updated 
  490         @dynamic1 same as above 
  491         metric what metric should be used 
  495         self.dynamic0=dynamic0
 
  496         self.dynamic1=dynamic1
 
  501         """return data structure for the RMSD calculation""" 
  507             if name 
not in molnames:
 
  514                 sel=
IMP.atom.Selection(mol,residue_index=i,representation_type=IMP.atom.BALLS,resolution=1)
 
  515                 parts=sel.get_selected_particles()
 
  517                     mol_coords[mol].append(
IMP.core.XYZ(parts[0]).get_coordinates())
 
  521                 moldict[name].append(mol)
 
  524         return moldict, mol_coords, mol_XYZs
 
  526     def get_rmsd_and_assigments(self):
 
  532         for molname, ref_mols 
in self.moldict1.items():
 
  534             for ref_mol 
in ref_mols:
 
  536                     coord1=[XYZ.get_coordinates() 
for XYZ 
in self.mol_XYZs1[ref_mol]]
 
  538                     coord1=self.molcoords1[ref_mol]
 
  543             for rmf_mols 
in itertools.permutations(self.moldict0[molname]):
 
  545                 for rmf_mol 
in rmf_mols:
 
  547                         coord0=[XYZ.get_coordinates() 
for XYZ 
in self.mol_XYZs0[rmf_mol]]
 
  549                         coord0=self.molcoords0[rmf_mol]
 
  553                 rmf_mols_list.append(rmf_mols)
 
  555             rmf_mols_best_order=rmf_mols_list[rmsd.index(m)]
 
  558             for n, (ref_mol,rmf_mol) 
in enumerate(zip(ref_mols,rmf_mols_best_order)):
 
  559                 best_assignments.append((rmf_mol,ref_mol))
 
  561                     coord0=[XYZ.get_coordinates() 
for XYZ 
in self.mol_XYZs0[rmf_mol]]
 
  563                     coord0=self.molcoords0[rmf_mol]
 
  566                     coord1=[XYZ.get_coordinates() 
for XYZ 
in self.mol_XYZs1[ref_mol]]
 
  568                     coord1=self.molcoords1[ref_mol]
 
  569                 rmsd_pair=self.metric(coord1, coord0)
 
  570                 N=len(self.molcoords1[ref_mol])
 
  572                 total_rmsd+=rmsd_pair*rmsd_pair*N
 
  573                 rmsd_dict[ref_mol]=rmsd_pair
 
  574         total_rmsd = sqrt(total_rmsd/total_N)
 
  575         return total_rmsd,best_assignments
 
  578         """Returns output for IMP.pmi1.output.Output object""" 
  579         total_rmsd,best_assignments=self.get_rmsd_and_assigments()
 
  582         for rmf_mol,ref_mol 
in best_assignments:
 
  583             ref_name=ref_mol.get_name()
 
  585             rmf_name=rmf_mol.get_name()
 
  587             assignments_out.append(rmf_name+
"."+str(rmf_copy)+
"->"+ref_name+
"."+str(ref_copy))
 
  588         return {
"RMSD_"+self.label:str(total_rmsd),
"RMSD_assignments_"+self.label:str(assignments_out)}
 
  594     """A class to evaluate the precision of an ensemble. 
  596     Also can evaluate the cross-precision of multiple ensembles. 
  597     Supports MPI for coordinate reading. 
  598     Recommended procedure: 
  599       -# initialize object and pass the selection for evaluating precision 
  600       -# call add_structures() to read in the data (specify group name) 
  601       -# call get_precision() to evaluate inter/intra precision 
  602       -# call get_rmsf() to evaluate within-group fluctuations 
  606                  selection_dictionary={}):
 
  608            @param model The IMP Model 
  609            @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is 
  610                   part of the hier name) 
  611            @param selection_dictionary Dictionary where keys are names for 
  612                   selections and values are selection tuples for scoring 
  613                   precision. "All" is automatically made as well 
  616             from mpi4py 
import MPI
 
  617             self.comm = MPI.COMM_WORLD
 
  618             self.rank = self.comm.Get_rank()
 
  619             self.number_of_processes = self.comm.size
 
  621             self.number_of_processes = 1
 
  624         self.styles = [
'pairwise_rmsd',
'pairwise_drmsd_k',
'pairwise_drmsd_Q',
 
  625                        'pairwise_drms_k',
'pairwise_rmsd',
'drmsd_from_center']
 
  626         self.style = 
'pairwise_drmsd_k' 
  627         self.structures_dictionary = {}
 
  628         self.reference_structures_dictionary = {}
 
  630         self.protein_names = 
None 
  631         self.len_particles_resolution_one = 
None 
  633         self.rmf_names_frames = {}
 
  634         self.reference_rmf_names_frames = 
None 
  635         self.reference_structure = 
None 
  636         self.reference_prot = 
None 
  637         self.selection_dictionary = selection_dictionary
 
  638         self.threshold = 40.0
 
  639         self.residue_particle_index_map = 
None 
  641         if resolution 
in [1,10]:
 
  642             self.resolution = resolution
 
  644             raise KeyError(
"Currently only allow resolution 1 or 10")
 
  646     def _get_structure(self,rmf_frame_index,rmf_name):
 
  647         """Read an RMF file and return the particles""" 
  648         rh = RMF.open_rmf_file_read_only(rmf_name)
 
  650             print(
"getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
 
  654             print(
"linking coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
 
  659         if self.resolution==1:
 
  661         elif self.resolution==10:
 
  664         protein_names = list(particle_dict.keys())
 
  665         particles_resolution_one = []
 
  666         for k 
in particle_dict:
 
  667             particles_resolution_one += (particle_dict[k])
 
  669         if self.protein_names==
None:
 
  670             self.protein_names = protein_names
 
  672             if self.protein_names!=protein_names:
 
  673                 print(
"Error: the protein names of the new coordinate set is not compatible with the previous one")
 
  675         if self.len_particles_resolution_one==
None:
 
  676             self.len_particles_resolution_one = len(particles_resolution_one)
 
  678             if self.len_particles_resolution_one!=len(particles_resolution_one):
 
  679                 raise ValueError(
"the new coordinate set is not compatible with the previous one")
 
  681         return particles_resolution_one
 
  687                       setup_index_map=
False):
 
  688         """ Read a structure into the ensemble and store (as coordinates). 
  689         @param rmf_name The name of the RMF file 
  690         @param rmf_frame_index The frame to read 
  691         @param structure_set_name Name for the set that includes this structure 
  693         @param setup_index_map if requested, set up a dictionary to help 
  698         if structure_set_name 
in self.structures_dictionary:
 
  699             cdict = self.structures_dictionary[structure_set_name]
 
  700             rmflist = self.rmf_names_frames[structure_set_name]
 
  702             self.structures_dictionary[structure_set_name]={}
 
  703             self.rmf_names_frames[structure_set_name]=[]
 
  704             cdict = self.structures_dictionary[structure_set_name]
 
  705             rmflist = self.rmf_names_frames[structure_set_name]
 
  709             particles_resolution_one = self._get_structure(rmf_frame_index,rmf_name)
 
  711             print(
"something wrong with the rmf")
 
  713         self.selection_dictionary.update({
"All":self.protein_names})
 
  715         for selection_name 
in self.selection_dictionary:
 
  716             selection_tuple = self.selection_dictionary[selection_name]
 
  717             coords = self._select_coordinates(selection_tuple,particles_resolution_one,self.prots[0])
 
  718             if selection_name 
not in cdict:
 
  719                 cdict[selection_name] = [coords]
 
  721                 cdict[selection_name].append(coords)
 
  723         rmflist.append((rmf_name,rmf_frame_index))
 
  727             self.residue_particle_index_map = {}
 
  728             for prot_name 
in self.protein_names:
 
  729                 self.residue_particle_index_map[prot_name] = \
 
  730                        self._get_residue_particle_index_map(
 
  732                            particles_resolution_one,self.prots[0])
 
  735                        rmf_name_frame_tuples,
 
  737         """Read a list of RMFs, supports parallel 
  738         @param rmf_name_frame_tuples list of (rmf_file_name,frame_number) 
  739         @param structure_set_name Name this set of structures (e.g. "cluster.1") 
  743         my_rmf_name_frame_tuples=IMP.pmi1.tools.chunk_list_into_segments(
 
  744             rmf_name_frame_tuples,self.number_of_processes)[self.rank]
 
  745         for nfr,tup 
in enumerate(my_rmf_name_frame_tuples):
 
  747             rmf_frame_index=tup[1]
 
  749             if self.residue_particle_index_map 
is None:
 
  750                 setup_index_map = 
True 
  752                 setup_index_map = 
False 
  759         if self.number_of_processes > 1:
 
  762                 self.comm.send(self.structures_dictionary, dest=0, tag=11)
 
  764                 for i 
in range(1, self.number_of_processes):
 
  765                     data_tmp = self.comm.recv(source=i, tag=11)
 
  766                     for key 
in self.structures_dictionary:
 
  767                         self.structures_dictionary[key].update(data_tmp[key])
 
  768                 for i 
in range(1, self.number_of_processes):
 
  769                     self.comm.send(self.structures_dictionary, dest=i, tag=11)
 
  771                 self.structures_dictionary = self.comm.recv(source=0, tag=11)
 
  773     def _get_residue_particle_index_map(self,prot_name,structure,hier):
 
  775         residue_particle_index_map = []
 
  777         all_selected_particles = s.get_selected_particles()
 
  778         intersection = list(set(all_selected_particles) & set(structure))
 
  779         sorted_intersection = IMP.pmi1.tools.sort_by_residues(intersection)
 
  780         for p 
in sorted_intersection:
 
  782         return residue_particle_index_map
 
  785     def _select_coordinates(self,tuple_selections,structure,prot):
 
  786         selected_coordinates=[]
 
  787         for t 
in tuple_selections:
 
  788             if type(t)==tuple 
and len(t)==3:
 
  790                 all_selected_particles = s.get_selected_particles()
 
  791                 intersection = list(set(all_selected_particles) & set(structure))
 
  792                 sorted_intersection = IMP.pmi1.tools.sort_by_residues(intersection)
 
  793                 cc = [tuple(
IMP.core.XYZ(p).get_coordinates()) 
for p 
in sorted_intersection]
 
  794                 selected_coordinates += cc
 
  797                 all_selected_particles = s.get_selected_particles()
 
  798                 intersection = list(set(all_selected_particles) & set(structure))
 
  799                 sorted_intersection = IMP.pmi1.tools.sort_by_residues(intersection)
 
  800                 cc = [tuple(
IMP.core.XYZ(p).get_coordinates()) 
for p 
in sorted_intersection]
 
  801                 selected_coordinates += cc
 
  803                 raise ValueError(
"Selection error")
 
  804         return selected_coordinates
 
  806     def set_threshold(self,threshold):
 
  807         self.threshold = threshold
 
  809     def _get_distance(self,
 
  815         """ Compute distance between structures with various metrics """ 
  816         c1 = self.structures_dictionary[structure_set_name1][selection_name][index1]
 
  817         c2 = self.structures_dictionary[structure_set_name2][selection_name][index2]
 
  821         if self.style==
'pairwise_drmsd_k':
 
  823         if self.style==
'pairwise_drms_k':
 
  825         if self.style==
'pairwise_drmsd_Q':
 
  828         if self.style==
'pairwise_rmsd':
 
  832     def _get_particle_distances(self,structure_set_name1,structure_set_name2,
 
  833                                selection_name,index1,index2):
 
  834         c1 = self.structures_dictionary[structure_set_name1][selection_name][index1]
 
  835         c2 = self.structures_dictionary[structure_set_name2][selection_name][index2]
 
  840         distances=[np.linalg.norm(a-b) 
for (a,b) 
in zip(coordinates1,coordinates2)]
 
  849                       selection_keywords=
None):
 
  850         """ Evaluate the precision of two named structure groups. Supports MPI. 
  851         When the structure_set_name1 is different from the structure_set_name2, 
  852         this evaluates the cross-precision (average pairwise distances). 
  853         @param outfile Name of the precision output file 
  854         @param structure_set_name1  string name of the first structure set 
  855         @param structure_set_name2  string name of the second structure set 
  856         @param skip analyze every (skip) structure for the distance matrix calculation 
  857         @param selection_keywords Specify the selection name you want to calculate on. 
  858                By default this is computed for everything you provided in the constructor, 
  859                plus all the subunits together. 
  861         if selection_keywords 
is None:
 
  862             sel_keys = list(self.selection_dictionary.keys())
 
  864             for k 
in selection_keywords:
 
  865                 if k 
not in self.selection_dictionary:
 
  866                     raise KeyError(
"you are trying to find named selection " \
 
  867                         + k + 
" which was not requested in the constructor")
 
  868             sel_keys = selection_keywords
 
  870         if outfile 
is not None:
 
  871             of = open(outfile,
"w")
 
  873         for selection_name 
in sel_keys:
 
  874             number_of_structures_1 = len(self.structures_dictionary[structure_set_name1][selection_name])
 
  875             number_of_structures_2 = len(self.structures_dictionary[structure_set_name2][selection_name])
 
  877             structure_pointers_1 = list(range(0,number_of_structures_1,skip))
 
  878             structure_pointers_2 = list(range(0,number_of_structures_2,skip))
 
  879             pair_combination_list = list(itertools.product(structure_pointers_1,structure_pointers_2))
 
  880             if len(pair_combination_list)==0:
 
  881                 raise ValueError(
"no structure selected. Check the skip parameter.")
 
  884             my_pair_combination_list = IMP.pmi1.tools.chunk_list_into_segments(
 
  885                 pair_combination_list,self.number_of_processes)[self.rank]
 
  886             my_length = len(my_pair_combination_list)
 
  887             for n,pair 
in enumerate(my_pair_combination_list):
 
  888                 progression = int(float(n)/my_length*100.0)
 
  889                 distances[pair] = self._get_distance(structure_set_name1,structure_set_name2,
 
  890                                                      selection_name,pair[0],pair[1])
 
  891             if self.number_of_processes > 1:
 
  896                 if structure_set_name1==structure_set_name2:
 
  897                     structure_pointers = structure_pointers_1
 
  898                     number_of_structures = number_of_structures_1
 
  903                     distances_to_structure = {}
 
  904                     distances_to_structure_normalization = {}
 
  906                     for n 
in structure_pointers:
 
  907                         distances_to_structure[n] = 0.0
 
  908                         distances_to_structure_normalization[n]=0
 
  911                         distance += distances[k]
 
  912                         distances_to_structure[k[0]] += distances[k]
 
  913                         distances_to_structure[k[1]] += distances[k]
 
  914                         distances_to_structure_normalization[k[0]] += 1
 
  915                         distances_to_structure_normalization[k[1]] += 1
 
  917                     for n 
in structure_pointers:
 
  918                         distances_to_structure[n] = distances_to_structure[n]/distances_to_structure_normalization[n]
 
  920                     min_distance = min([distances_to_structure[n] 
for n 
in distances_to_structure])
 
  921                     centroid_index = [k 
for k, v 
in distances_to_structure.items() 
if v == min_distance][0]
 
  922                     centroid_rmf_name = self.rmf_names_frames[structure_set_name1][centroid_index]
 
  924                     centroid_distance = 0.0
 
  926                     for n 
in range(number_of_structures):
 
  927                         dist = self._get_distance(structure_set_name1,structure_set_name1,
 
  928                                                              selection_name,centroid_index,n)
 
  929                         centroid_distance += dist
 
  930                         distance_list.append(dist)
 
  933                     centroid_distance /= number_of_structures
 
  935                     if outfile 
is not None:
 
  936                         of.write(str(selection_name)+
" "+structure_set_name1+
 
  937                                         " average centroid distance "+str(centroid_distance)+
"\n")
 
  938                         of.write(str(selection_name)+
" "+structure_set_name1+
 
  939                                         " centroid index "+str(centroid_index)+
"\n")
 
  940                         of.write(str(selection_name)+
" "+structure_set_name1+
 
  941                                         " centroid rmf name "+str(centroid_rmf_name)+
"\n")
 
  942                         of.write(str(selection_name)+
" "+structure_set_name1+
 
  943                                         " median centroid distance  "+str(np.median(distance_list))+
"\n")
 
  945                 average_pairwise_distances=sum(distances.values())/len(list(distances.values()))
 
  946                 if outfile 
is not None:
 
  947                     of.write(str(selection_name)+
" "+structure_set_name1+
" "+structure_set_name2+
 
  948                              " average pairwise distance "+str(average_pairwise_distances)+
"\n")
 
  949         if outfile 
is not None:
 
  951         return centroid_index
 
  957                  set_plot_yaxis_range=
None):
 
  958         """ Calculate the residue mean square fluctuations (RMSF). 
  959         Automatically outputs as data file and pdf 
  960         @param structure_set_name Which structure set to calculate RMSF for 
  961         @param outdir Where to write the files 
  962         @param skip Skip this number of structures 
  963         @param set_plot_yaxis_range In case you need to change the plot 
  971             for sel_name 
in self.protein_names:
 
  972                 self.selection_dictionary.update({sel_name:[sel_name]})
 
  974                     number_of_structures = len(self.structures_dictionary[structure_set_name][sel_name])
 
  978                 rpim = self.residue_particle_index_map[sel_name]
 
  979                 outfile = outdir+
"/rmsf."+sel_name+
".dat" 
  980                 of = open(outfile,
"w")
 
  981                 residue_distances = {}
 
  983                 for index 
in range(number_of_structures):
 
  984                     distances = self._get_particle_distances(structure_set_name,
 
  987                                                              centroid_index,index)
 
  988                     for nblock,block 
in enumerate(rpim):
 
  989                         for residue_number 
in block:
 
  990                             residue_nblock[residue_number] = nblock
 
  991                             if residue_number 
not in residue_distances:
 
  992                                 residue_distances[residue_number] = [distances[nblock]]
 
  994                                 residue_distances[residue_number].append(distances[nblock])
 
  998                 for rn 
in residue_distances:
 
 1000                     rmsf = np.std(residue_distances[rn])
 
 1002                     of.write(str(rn)+
" "+str(residue_nblock[rn])+
" "+str(rmsf)+
"\n")
 
 1004                 IMP.pmi1.output.plot_xy_data(residues,rmsfs,title=sel_name,
 
 1005                                             out_fn=outdir+
"/rmsf."+sel_name,display=
False,
 
 1006                                             set_plot_yaxis_range=set_plot_yaxis_range,
 
 1007                                             xlabel=
'Residue Number',ylabel=
'Standard error')
 
 1012         """Read in a structure used for reference computation. 
 1013         Needed before calling get_average_distance_wrt_reference_structure() 
 1014         @param rmf_name The RMF file to read the reference 
 1015         @param rmf_frame_index The index in that file 
 1017         particles_resolution_one = self._get_structure(rmf_frame_index,rmf_name)
 
 1018         self.reference_rmf_names_frames = (rmf_name,rmf_frame_index)
 
 1020         for selection_name 
in self.selection_dictionary:
 
 1021             selection_tuple = self.selection_dictionary[selection_name]
 
 1022             coords = self._select_coordinates(selection_tuple,
 
 1023                                               particles_resolution_one,self.prots[0])
 
 1024             self.reference_structures_dictionary[selection_name] = coords
 
 1027         """First align then calculate RMSD 
 1028         @param structure_set_name: the name of the structure set 
 1029         @param alignment_selection: the key containing the selection tuples needed to make the alignment stored in self.selection_dictionary 
 1030         @return: for each structure in the structure set, returns the rmsd 
 1032         if self.reference_structures_dictionary=={}:
 
 1033             print(
"Cannot compute until you set a reference structure")
 
 1036         align_reference_coordinates = self.reference_structures_dictionary[alignment_selection_key]
 
 1037         align_coordinates = self.structures_dictionary[structure_set_name][alignment_selection_key]
 
 1038         transformations = []
 
 1039         for c 
in align_coordinates:
 
 1041             transformation = Ali.align()[1]
 
 1042             transformations.append(transformation)
 
 1043         for selection_name 
in self.selection_dictionary:
 
 1044             reference_coordinates = self.reference_structures_dictionary[selection_name]
 
 1047             for n,sc 
in enumerate(self.structures_dictionary[structure_set_name][selection_name]):
 
 1050                 distances.append(distance)
 
 1051             print(selection_name,
"average rmsd",sum(distances)/len(distances),
"median",self._get_median(distances),
"minimum distance",min(distances))
 
 1053     def _get_median(self,list_of_values):
 
 1054         return np.median(np.array(list_of_values))
 
 1057         """Compare the structure set to the reference structure. 
 1058         @param structure_set_name The structure set to compute this on 
 1059         @note First call set_reference_structure() 
 1062         if self.reference_structures_dictionary=={}:
 
 1063             print(
"Cannot compute until you set a reference structure")
 
 1065         for selection_name 
in self.selection_dictionary:
 
 1066             reference_coordinates = self.reference_structures_dictionary[selection_name]
 
 1069             for sc 
in self.structures_dictionary[structure_set_name][selection_name]:
 
 1071                 if self.style==
'pairwise_drmsd_k':
 
 1073                 if self.style==
'pairwise_drms_k':
 
 1075                 if self.style==
'pairwise_drmsd_Q':
 
 1077                 if self.style==
'pairwise_rmsd':
 
 1079                 distances.append(distance)
 
 1081             print(selection_name,
"average distance",sum(distances)/len(distances),
"minimum distance",min(distances),
'nframes',len(distances))
 
 1082             ret[selection_name] = {
'average_distance':sum(distances)/len(distances),
'minimum_distance':min(distances)}
 
 1085     def get_coordinates(self):
 
 1088     def set_precision_style(self, style):
 
 1089         if style 
in self.styles:
 
 1092             raise ValueError(
"No such style")
 
 1096     """Compute mean density maps from structures. 
 1098     Keeps a dictionary of density maps, 
 1099     keys are in the custom ranges. When you call add_subunits_density, it adds 
 1100     particle coordinates to the existing density maps. 
 1103     def __init__(self, custom_ranges, representation=None, resolution=20.0, voxel=5.0):
 
 1105            @param custom_ranges  Required. It's a dictionary, keys are the 
 1106                   density component names, values are selection tuples 
 1107                           e.g. {'kin28':[['kin28',1,-1]], 
 1108                                'density_name_1' :[('ccl1')], 
 1109                                'density_name_2' :[(1,142,'tfb3d1'), 
 1110                                                   (143,700,'tfb3d2')], 
 1111            @param representation PMI representation, for doing selections. 
 1112                           Not needed if you only pass hierarchies 
 1113            @param resolution The MRC resolution of the output map (in Angstrom unit) 
 1114            @param voxel The voxel size for the output map (lower is slower) 
 1117         self.representation = representation
 
 1118         self.MRCresolution = resolution
 
 1121         self.count_models = 0.0
 
 1122         self.custom_ranges = custom_ranges
 
 1125         """Add a frame to the densities. 
 1126         @param hierarchy Optionally read the hierarchy from somewhere. 
 1127                          If not passed, will just read the representation. 
 1129         self.count_models += 1.0
 
 1133             all_particles_by_resolution = []
 
 1134             for name 
in part_dict:
 
 1135                 all_particles_by_resolution += part_dict[name]
 
 1137         for density_name 
in self.custom_ranges:
 
 1140                 all_particles_by_segments = []
 
 1142             for seg 
in self.custom_ranges[density_name]:
 
 1145                     parts += IMP.tools.select_by_tuple(self.representation,
 
 1146                                                        seg, resolution=1, name_is_ambiguous=
False)
 
 1149                     for h 
in hierarchy.get_children():
 
 1153                     if type(seg) == str:
 
 1155                     elif type(seg) == tuple 
and len(seg) == 2:
 
 1157                             hierarchy, molecule=seg[0],copy_index=seg[1])
 
 1158                     elif type(seg) == tuple 
and len(seg) == 3:
 
 1160                             hierarchy, molecule=seg[2],residue_indexes=range(seg[0], seg[1] + 1))
 
 1161                     elif type(seg) == tuple 
and len(seg) == 4:
 
 1163                             hierarchy, molecule=seg[2],residue_indexes=range(seg[0], seg[1] + 1),copy_index=seg[3])
 
 1165                         raise Exception(
'could not understand selection tuple '+str(seg))
 
 1167                     all_particles_by_segments += s.get_selected_particles()
 
 1170                         set(all_particles_by_segments) & set(all_particles_by_resolution))
 
 1171             self._create_density_from_particles(parts, density_name)
 
 1173     def normalize_density(self):
 
 1176     def _create_density_from_particles(self, ps, name,
 
 1177                                       kernel_type=
'GAUSSIAN'):
 
 1178         '''Internal function for adding to densities. 
 1179         pass XYZR particles with mass and create a density from them. 
 1180         kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.''' 
 1182             'GAUSSIAN': IMP.em.GAUSSIAN,
 
 1183             'BINARIZED_SPHERE': IMP.em.BINARIZED_SPHERE,
 
 1184             'SPHERE': IMP.em.SPHERE}
 
 1188         dmap.set_was_used(
True)
 
 1189         if name 
not in self.densities:
 
 1190             self.densities[name] = dmap
 
 1196             dmap3.set_was_used(
True)
 
 1198             dmap3.add(self.densities[name])
 
 1199             self.densities[name] = dmap3
 
 1201     def get_density_keys(self):
 
 1202         return list(self.densities.keys())
 
 1205         """Get the current density for some component name""" 
 1206         if name 
not in self.densities:
 
 1209             return self.densities[name]
 
 1211     def write_mrc(self, path="./",suffix=None):
 
 1213         for density_name 
in self.densities:
 
 1214             self.densities[density_name].
multiply(1. / self.count_models)
 
 1216                 name=path + 
"/" + density_name + 
".mrc" 
 1218                 name=path + 
"/" + density_name + 
"." + suffix + 
".mrc" 
 1219             path, file = os.path.split(name)
 
 1220             if not os.path.exists(path):
 
 1223                 except OSError 
as e:
 
 1224                     if e.errno != errno.EEXIST:
 
 1227                 self.densities[density_name],name,
 
 1231 class GetContactMap(object):
 
 1234         self.distance = distance
 
 1235         self.contactmap = 
'' 
 1242     def set_prot(self, prot):
 
 1243         from scipy.spatial.distance 
import cdist
 
 1252         for name 
in particles_dictionary:
 
 1253             residue_indexes = []
 
 1254             for p 
in particles_dictionary[name]:
 
 1258             if len(residue_indexes) != 0:
 
 1259                 self.protnames.append(name)
 
 1261     def get_subunit_coords(self, frame, align=0):
 
 1262         from scipy.spatial.distance 
import cdist
 
 1266         test, testr = [], []
 
 1267         for part 
in self.prot.get_children():
 
 1270             for chl 
in part.get_children():
 
 1276                 SortedSegments.append((chl, startres))
 
 1277             SortedSegments = sorted(SortedSegments, key=itemgetter(1))
 
 1279             for sgmnt 
in SortedSegments:
 
 1282                     crd = np.array([p.get_x(), p.get_y(), p.get_z()])
 
 1285                     radii.append(p.get_radius())
 
 1287                     new_name = part.get_name() + 
'_' + sgmnt[0].get_name() +\
 
 1290                             .get_residue_indexes()[0])
 
 1291                     namelist.append(new_name)
 
 1292                     self.expanded[new_name] = len(
 
 1294                     if part.get_name() 
not in self.resmap:
 
 1295                         self.resmap[part.get_name()] = {}
 
 1297                         self.resmap[part.get_name()][res] = new_name
 
 1299         coords = np.array(coords)
 
 1300         radii = np.array(radii)
 
 1301         if len(self.namelist) == 0:
 
 1302             self.namelist = namelist
 
 1303             self.contactmap = np.zeros((len(coords), len(coords)))
 
 1304         distances = cdist(coords, coords)
 
 1305         distances = (distances - radii).T - radii
 
 1306         distances = distances <= self.distance
 
 1307         self.contactmap += distances
 
 1312             identification_string=
'ISDCrossLinkMS_Distance_'):
 
 1315         data = open(filname)
 
 1316         D = data.readlines()
 
 1320             if identification_string 
in d:
 
 1327                 t1, t2 = (d[0], d[1]), (d[1], d[0])
 
 1328                 if t1 
not in self.XL:
 
 1329                     self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
 
 1330                     self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
 
 1332                     self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
 
 1333                     self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
 
 1335     def dist_matrix(self, skip_cmap=0, skip_xl=1, outname=None):
 
 1339         L = sum(self.expanded.values())
 
 1340         proteins = self.protnames
 
 1345             proteins = [p.get_name() 
for p 
in self.prot.get_children()]
 
 1347             for p1 
in range(len(proteins)):
 
 1348                 for p2 
in range(p1, len(proteins)):
 
 1350                         self.resmap[proteins[p1]].keys()), max(self.resmap[proteins[p2]].keys())
 
 1351                     pn1, pn2 = proteins[p1], proteins[p2]
 
 1352                     mtr = np.zeros((pl1 + 1, pl2 + 1))
 
 1353                     print(
'Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
 
 1354                     for i1 
in range(1, pl1 + 1):
 
 1355                         for i2 
in range(1, pl2 + 1):
 
 1357                                 r1 = K.index(self.resmap[pn1][i1])
 
 1358                                 r2 = K.index(self.resmap[pn2][i2])
 
 1360                                 mtr[i1 - 1, i2 - 1] = r
 
 1362                                 missing.append((pn1, pn2, i1, i2))
 
 1364                     Matrices[(pn1, pn2)] = mtr
 
 1369                 raise ValueError(
"cross-links were not provided, use add_xlinks function!")
 
 1372             for p1 
in range(len(proteins)):
 
 1373                 for p2 
in range(p1, len(proteins)):
 
 1375                         self.resmap[proteins[p1]].keys()), max(self.resmap[proteins[p2]].keys())
 
 1376                     pn1, pn2 = proteins[p1], proteins[p2]
 
 1377                     mtr = np.zeros((pl1 + 1, pl2 + 1))
 
 1380                         xls = self.XL[(pn1, pn2)]
 
 1383                             xls = self.XL[(pn2, pn1)]
 
 1388                         print(
'Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
 
 1389                         for xl1, xl2 
in xls:
 
 1391                                 print(
'X' * 10, xl1, xl2)
 
 1394                                 print(
'X' * 10, xl1, xl2)
 
 1396                             mtr[xl1 - 1, xl2 - 1] = 100
 
 1398                         print(
'Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
 
 1399                         for xl1, xl2 
in xls:
 
 1401                                 print(
'X' * 10, xl1, xl2)
 
 1404                                 print(
'X' * 10, xl1, xl2)
 
 1406                             mtr[xl2 - 1, xl1 - 1] = 100
 
 1408                         print(
'No cross links between: ', pn1, pn2)
 
 1409                     Matrices_xl[(pn1, pn2)] = mtr
 
 1425         for i, c 
in enumerate(C):
 
 1426             cl = max(self.resmap[c].keys())
 
 1431                 R.append(R[-1] + cl)
 
 1436             import matplotlib 
as mpl
 
 1438         import matplotlib.pyplot 
as plt
 
 1439         import matplotlib.gridspec 
as gridspec
 
 1440         import scipy.sparse 
as sparse
 
 1443         gs = gridspec.GridSpec(len(W), len(W),
 
 1448         for x1, r1 
in enumerate(R):
 
 1453             for x2, r2 
in enumerate(R):
 
 1459                 ax = plt.subplot(gs[cnt])
 
 1462                         mtr = Matrices[(C[x1], C[x2])]
 
 1464                         mtr = Matrices[(C[x2], C[x1])].T
 
 1468                         interpolation=
'nearest',
 
 1470                         vmax=log(mtr.max()))
 
 1475                         mtr = Matrices_xl[(C[x1], C[x2])]
 
 1477                         mtr = Matrices_xl[(C[x2], C[x1])].T
 
 1479                         sparse.csr_matrix(mtr),
 
 1489                     ax.set_ylabel(C[x1], rotation=90)
 
 1491             plt.savefig(outname + 
".pdf", dpi=300, transparent=
"False")
 
 1499 def get_hiers_from_rmf(model, frame_number, rmf_file):
 
 1501     print(
"getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
 
 1504     rh = RMF.open_rmf_file_read_only(rmf_file)
 
 1509         print(
"Unable to open rmf file %s" % (rmf_file))
 
 1516         print(
"Unable to open frame %i of file %s" % (frame_number, rmf_file))
 
 1523 def link_hiers_to_rmf(model,hiers,frame_number, rmf_file):
 
 1524     print(
"linking hierarchies for frame %i rmf file %s" % (frame_number, rmf_file))
 
 1525     rh = RMF.open_rmf_file_read_only(rmf_file)
 
 1530         print(
"Unable to open frame %i of file %s" % (frame_number, rmf_file))
 
 1537 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
 
 1539     print(
"getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
 
 1542     rh = RMF.open_rmf_file_read_only(rmf_file)
 
 1548         print(
"Unable to open rmf file %s" % (rmf_file))
 
 1553         print(
"Unable to open frame %i of file %s" % (frame_number, rmf_file))
 
 1560 def link_hiers_and_restraints_to_rmf(model,hiers,rs, frame_number, rmf_file):
 
 1561     print(
"linking hierarchies for frame %i rmf file %s" % (frame_number, rmf_file))
 
 1562     rh = RMF.open_rmf_file_read_only(rmf_file)
 
 1568         print(
"Unable to open frame %i of file %s" % (frame_number, rmf_file))
 
 1576     """Get particles at res 1, or any beads, based on the name. 
 1577     No Representation is needed. This is mainly used when the hierarchy 
 1578     is read from an RMF file. 
 1579     @return a dictionary of component names and their particles 
 1580     \note If the root node is named "System" or is a "State", do proper selection. 
 1585     for c 
in prot.get_children():
 
 1588         for s 
in c.get_children():
 
 1589             if "_Res:1" in s.get_name() 
and "_Res:10" not in s.get_name():
 
 1591             if "Beads" in s.get_name():
 
 1595     for name 
in particle_dict:
 
 1596         particle_dict[name] = IMP.pmi1.tools.sort_by_residues(
 
 1597             list(set(particle_dict[name]) & set(allparticles)))
 
 1598     return particle_dict
 
 1601     """Get particles at res 10, or any beads, based on the name. 
 1602     No Representation is needed. 
 1603     This is mainly used when the hierarchy is read from an RMF file. 
 1604     @return a dictionary of component names and their particles 
 1605     \note If the root node is named "System" or is a "State", do proper selection. 
 1609     for c 
in prot.get_children():
 
 1612         for s 
in c.get_children():
 
 1613             if "_Res:10" in s.get_name():
 
 1615             if "Beads" in s.get_name():
 
 1618     for name 
in particle_dict:
 
 1619         particle_dict[name] = IMP.pmi1.tools.sort_by_residues(
 
 1620             list(set(particle_dict[name]) & set(allparticles)))
 
 1621     return particle_dict
 
 1623 def select_by_tuple(first_res_last_res_name_tuple):
 
 1624     first_res = first_res_last_res_hier_tuple[0]
 
 1625     last_res = first_res_last_res_hier_tuple[1]
 
 1626     name = first_res_last_res_hier_tuple[2]
 
 1629     """Visualization of crosslinks""" 
 1631         self.crosslinks = []
 
 1632         self.external_csv_data = 
None 
 1633         self.crosslinkedprots = set()
 
 1634         self.mindist = +10000000.0
 
 1635         self.maxdist = -10000000.0
 
 1636         self.contactmap = 
None 
 1638     def set_hierarchy(self, prot):
 
 1639         self.prot_length_dict = {}
 
 1640         self.model=prot.get_model()
 
 1642         for i 
in prot.get_children():
 
 1644             residue_indexes = []
 
 1648             if len(residue_indexes) != 0:
 
 1649                 self.prot_length_dict[name] = max(residue_indexes)
 
 1651     def set_coordinates_for_contact_map(self, rmf_name,rmf_frame_index):
 
 1652         from scipy.spatial.distance 
import cdist
 
 1654         rh= RMF.open_rmf_file_read_only(rmf_name)
 
 1657         print(
"getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
 
 1668         self.index_dictionary = {}
 
 1670         for name 
in particles_dictionary:
 
 1671             residue_indexes = []
 
 1672             for p 
in particles_dictionary[name]:
 
 1677                 if len(residue_indexes) != 0:
 
 1679                     for res 
in range(min(residue_indexes), max(residue_indexes) + 1):
 
 1682                         crd = np.array([d.get_x(), d.get_y(), d.get_z()])
 
 1684                         radii.append(d.get_radius())
 
 1685                         if name 
not in self.index_dictionary:
 
 1686                             self.index_dictionary[name] = [resindex]
 
 1688                             self.index_dictionary[name].append(resindex)
 
 1691         coords = np.array(coords)
 
 1692         radii = np.array(radii)
 
 1694         distances = cdist(coords, coords)
 
 1695         distances = (distances - radii).T - radii
 
 1697         distances = np.where(distances <= 20.0, 1.0, 0)
 
 1698         if self.contactmap 
is None:
 
 1699             self.contactmap = np.zeros((len(coords), len(coords)))
 
 1700         self.contactmap += distances
 
 1702         for prot 
in prots: IMP.atom.destroy(prot)
 
 1705         self, data_file, search_label=
'ISDCrossLinkMS_Distance_',
 
 1708         filter_rmf_file_names=
None, 
 
 1709         external_csv_data_file=
None,
 
 1710         external_csv_data_file_unique_id_key=
"Unique ID"):
 
 1719         keys = po.get_keys()
 
 1721         xl_keys = [k 
for k 
in keys 
if search_label 
in k]
 
 1723         if filter_rmf_file_names 
is not None:
 
 1724             rmf_file_key=
"local_rmf_file_name" 
 1725             fs = po.get_fields(xl_keys+[rmf_file_key])
 
 1727             fs = po.get_fields(xl_keys)
 
 1730         self.cross_link_frequency = {}
 
 1734         self.cross_link_distances = {}
 
 1738         self.cross_link_distances_unique = {}
 
 1740         if not external_csv_data_file 
is None:
 
 1743             self.external_csv_data = {}
 
 1744             xldb = IMP.pmi1.tools.get_db_from_csv(external_csv_data_file)
 
 1747                 self.external_csv_data[
 
 1748                     xl[external_csv_data_file_unique_id_key]] = xl
 
 1753         cross_link_frequency_list = []
 
 1755         self.unique_cross_link_list = []
 
 1759             keysplit = key.replace(
 
 1768             if filter_label!=
None:
 
 1769                 if filter_label 
not in keysplit: 
continue 
 1772                 r1 = int(keysplit[5])
 
 1774                 r2 = int(keysplit[7])
 
 1777                     confidence = keysplit[12]
 
 1781                     unique_identifier = keysplit[3]
 
 1783                     unique_identifier = 
'0' 
 1785                 r1 = int(keysplit[mapping[
"Residue1"]])
 
 1786                 c1 = keysplit[mapping[
"Protein1"]]
 
 1787                 r2 = int(keysplit[mapping[
"Residue2"]])
 
 1788                 c2 = keysplit[mapping[
"Protein2"]]
 
 1790                     confidence = keysplit[mapping[
"Confidence"]]
 
 1794                     unique_identifier = keysplit[mapping[
"Unique Identifier"]]
 
 1796                     unique_identifier = 
'0' 
 1798             self.crosslinkedprots.add(c1)
 
 1799             self.crosslinkedprots.add(c2)
 
 1805             if filter_rmf_file_names 
is not None:
 
 1806                 for n,d 
in enumerate(fs[key]):
 
 1807                     if fs[rmf_file_key][n] 
in filter_rmf_file_names:
 
 1808                         dists.append(float(d))
 
 1810                 dists=[float(f) 
for f 
in fs[key]]
 
 1815             mdist = self.median(dists)
 
 1817             stdv = np.std(np.array(dists))
 
 1818             if self.mindist > mdist:
 
 1819                 self.mindist = mdist
 
 1820             if self.maxdist < mdist:
 
 1821                 self.maxdist = mdist
 
 1825             if not self.external_csv_data 
is None:
 
 1826                 sample = self.external_csv_data[unique_identifier][
"Sample"]
 
 1830             if (r1, c1, r2, c2,mdist) 
not in cross_link_frequency_list:
 
 1831                 if (r1, c1, r2, c2) 
not in self.cross_link_frequency:
 
 1832                     self.cross_link_frequency[(r1, c1, r2, c2)] = 1
 
 1833                     self.cross_link_frequency[(r2, c2, r1, c1)] = 1
 
 1835                     self.cross_link_frequency[(r2, c2, r1, c1)] += 1
 
 1836                     self.cross_link_frequency[(r1, c1, r2, c2)] += 1
 
 1837                 cross_link_frequency_list.append((r1, c1, r2, c2))
 
 1838                 cross_link_frequency_list.append((r2, c2, r1, c1))
 
 1839                 self.unique_cross_link_list.append(
 
 1840                     (r1, c1, r2, c2,mdist))
 
 1842             if (r1, c1, r2, c2) 
not in self.cross_link_distances:
 
 1843                 self.cross_link_distances[(
 
 1849                     confidence)] = dists
 
 1850                 self.cross_link_distances[(
 
 1856                     confidence)] = dists
 
 1857                 self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
 
 1859                 self.cross_link_distances[(
 
 1865                     confidence)] += dists
 
 1866                 self.cross_link_distances[(
 
 1872                     confidence)] += dists
 
 1874             self.crosslinks.append(
 
 1884             self.crosslinks.append(
 
 1895         self.cross_link_frequency_inverted = {}
 
 1896         for xl 
in self.unique_cross_link_list:
 
 1897             (r1, c1, r2, c2, mdist) = xl
 
 1898             frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
 
 1899             if frequency 
not in self.cross_link_frequency_inverted:
 
 1900                 self.cross_link_frequency_inverted[
 
 1901                     frequency] = [(r1, c1, r2, c2)]
 
 1903                 self.cross_link_frequency_inverted[
 
 1904                     frequency].append((r1, c1, r2, c2))
 
 1908     def median(self, mylist):
 
 1909         sorts = sorted(mylist)
 
 1915             return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
 
 1916         return sorts[length / 2]
 
 1918     def set_threshold(self,threshold):
 
 1919         self.threshold=threshold
 
 1921     def set_tolerance(self,tolerance):
 
 1922         self.tolerance=tolerance
 
 1924     def colormap(self, dist):
 
 1925         if dist < self.threshold - self.tolerance:
 
 1927         elif dist >= self.threshold + self.tolerance:
 
 1932     def write_cross_link_database(self, filename, format='csv'):
 
 1936             "Unique ID", 
"Protein1", 
"Residue1", 
"Protein2", 
"Residue2",
 
 1937             "Median Distance", 
"Standard Deviation", 
"Confidence", 
"Frequency", 
"Arrangement"]
 
 1939         if not self.external_csv_data 
is None:
 
 1940             keys = list(self.external_csv_data.keys())
 
 1941             innerkeys = list(self.external_csv_data[keys[0]].keys())
 
 1943             fieldnames += innerkeys
 
 1945         dw = csv.DictWriter(
 
 1949             fieldnames=fieldnames)
 
 1951         for xl 
in self.crosslinks:
 
 1952             (r1, c1, r2, c2, mdist, stdv, confidence,
 
 1953              unique_identifier, descriptor) = xl
 
 1954             if descriptor == 
'original':
 
 1956                 outdict[
"Unique ID"] = unique_identifier
 
 1957                 outdict[
"Protein1"] = c1
 
 1958                 outdict[
"Protein2"] = c2
 
 1959                 outdict[
"Residue1"] = r1
 
 1960                 outdict[
"Residue2"] = r2
 
 1961                 outdict[
"Median Distance"] = mdist
 
 1962                 outdict[
"Standard Deviation"] = stdv
 
 1963                 outdict[
"Confidence"] = confidence
 
 1964                 outdict[
"Frequency"] = self.cross_link_frequency[
 
 1967                     arrangement = 
"Intra" 
 1969                     arrangement = 
"Inter" 
 1970                 outdict[
"Arrangement"] = arrangement
 
 1971                 if not self.external_csv_data 
is None:
 
 1972                     outdict.update(self.external_csv_data[unique_identifier])
 
 1974                 dw.writerow(outdict)
 
 1976     def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
 
 1977              no_confidence_info=
False, filter=
None, layout=
"whole", crosslinkedonly=
False,
 
 1978              filename=
None, confidence_classes=
None, alphablend=0.1, scale_symbol_size=1.0,
 
 1979              gap_between_components=0,
 
 1980              rename_protein_map=
None):
 
 1994         import matplotlib 
as mpl
 
 1996         import matplotlib.pyplot 
as plt
 
 1997         import matplotlib.cm 
as cm
 
 1999         fig = plt.figure(figsize=(10, 10))
 
 2000         ax = fig.add_subplot(111)
 
 2006         if prot_listx 
is None:
 
 2008                 prot_listx = list(self.crosslinkedprots)
 
 2010                 prot_listx = list(self.prot_length_dict.keys())
 
 2013         nresx = gap_between_components + \
 
 2014             sum([self.prot_length_dict[name]
 
 2015                 + gap_between_components 
for name 
in prot_listx])
 
 2019         if prot_listy 
is None:
 
 2021                 prot_listy = list(self.crosslinkedprots)
 
 2023                 prot_listy = list(self.prot_length_dict.keys())
 
 2026         nresy = gap_between_components + \
 
 2027             sum([self.prot_length_dict[name]
 
 2028                 + gap_between_components 
for name 
in prot_listy])
 
 2033         res = gap_between_components
 
 2034         for prot 
in prot_listx:
 
 2035             resoffsetx[prot] = res
 
 2036             res += self.prot_length_dict[prot]
 
 2038             res += gap_between_components
 
 2042         res = gap_between_components
 
 2043         for prot 
in prot_listy:
 
 2044             resoffsety[prot] = res
 
 2045             res += self.prot_length_dict[prot]
 
 2047             res += gap_between_components
 
 2049         resoffsetdiagonal = {}
 
 2050         res = gap_between_components
 
 2051         for prot 
in IMP.pmi1.tools.OrderedSet(prot_listx + prot_listy):
 
 2052             resoffsetdiagonal[prot] = res
 
 2053             res += self.prot_length_dict[prot]
 
 2054             res += gap_between_components
 
 2060         for n, prot 
in enumerate(prot_listx):
 
 2061             res = resoffsetx[prot]
 
 2063             for proty 
in prot_listy:
 
 2064                 resy = resoffsety[proty]
 
 2065                 endy = resendy[proty]
 
 2066                 ax.plot([res, res], [resy, endy], 
'k-', lw=0.4)
 
 2067                 ax.plot([end, end], [resy, endy], 
'k-', lw=0.4)
 
 2068             xticks.append((float(res) + float(end)) / 2)
 
 2069             if rename_protein_map 
is not None:
 
 2070                 if prot 
in rename_protein_map:
 
 2071                     prot=rename_protein_map[prot]
 
 2072             xlabels.append(prot)
 
 2076         for n, prot 
in enumerate(prot_listy):
 
 2077             res = resoffsety[prot]
 
 2079             for protx 
in prot_listx:
 
 2080                 resx = resoffsetx[protx]
 
 2081                 endx = resendx[protx]
 
 2082                 ax.plot([resx, endx], [res, res], 
'k-', lw=0.4)
 
 2083                 ax.plot([resx, endx], [end, end], 
'k-', lw=0.4)
 
 2084             yticks.append((float(res) + float(end)) / 2)
 
 2085             if rename_protein_map 
is not None:
 
 2086                 if prot 
in rename_protein_map:
 
 2087                     prot=rename_protein_map[prot]
 
 2088             ylabels.append(prot)
 
 2091         print(prot_listx, prot_listy)
 
 2093         if not self.contactmap 
is None:
 
 2094             import matplotlib.cm 
as cm
 
 2095             tmp_array = np.zeros((nresx, nresy))
 
 2097             for px 
in prot_listx:
 
 2099                 for py 
in prot_listy:
 
 2101                     resx = resoffsety[px]
 
 2102                     lengx = resendx[px] - 1
 
 2103                     resy = resoffsety[py]
 
 2104                     lengy = resendy[py] - 1
 
 2105                     indexes_x = self.index_dictionary[px]
 
 2106                     minx = min(indexes_x)
 
 2107                     maxx = max(indexes_x)
 
 2108                     indexes_y = self.index_dictionary[py]
 
 2109                     miny = min(indexes_y)
 
 2110                     maxy = max(indexes_y)
 
 2112                     print(px, py, minx, maxx, miny, maxy)
 
 2117                             resy:lengy] = self.contactmap[
 
 2124             ax.imshow(tmp_array,
 
 2127                       interpolation=
'nearest')
 
 2129         ax.set_xticks(xticks)
 
 2130         ax.set_xticklabels(xlabels, rotation=90)
 
 2131         ax.set_yticks(yticks)
 
 2132         ax.set_yticklabels(ylabels)
 
 2133         ax.set_xlim(0,nresx)
 
 2134         ax.set_ylim(0,nresy)
 
 2139         already_added_xls = []
 
 2141         for xl 
in self.crosslinks:
 
 2143             (r1, c1, r2, c2, mdist, stdv, confidence,
 
 2144              unique_identifier, descriptor) = xl
 
 2146             if confidence_classes 
is not None:
 
 2147                 if confidence 
not in confidence_classes:
 
 2151                 pos1 = r1 + resoffsetx[c1]
 
 2155                 pos2 = r2 + resoffsety[c2]
 
 2159             if not filter 
is None:
 
 2160                 xldb = self.external_csv_data[unique_identifier]
 
 2161                 xldb.update({
"Distance": mdist})
 
 2162                 xldb.update({
"Distance_stdv": stdv})
 
 2164                 if filter[1] == 
">":
 
 2165                     if float(xldb[filter[0]]) <= float(filter[2]):
 
 2168                 if filter[1] == 
"<":
 
 2169                     if float(xldb[filter[0]]) >= float(filter[2]):
 
 2172                 if filter[1] == 
"==":
 
 2173                     if float(xldb[filter[0]]) != float(filter[2]):
 
 2179             pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
 
 2180             pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
 
 2182             if layout == 
'lowerdiagonal':
 
 2183                 if pos_for_diagonal1 <= pos_for_diagonal2:
 
 2185             if layout == 
'upperdiagonal':
 
 2186                 if pos_for_diagonal1 >= pos_for_diagonal2:
 
 2189             already_added_xls.append((r1, c1, r2, c2))
 
 2191             if not no_confidence_info:
 
 2192                 if confidence == 
'0.01':
 
 2193                     markersize = 14 * scale_symbol_size
 
 2194                 elif confidence == 
'0.05':
 
 2195                     markersize = 9 * scale_symbol_size
 
 2196                 elif confidence == 
'0.1':
 
 2197                     markersize = 6 * scale_symbol_size
 
 2199                     markersize = 15 * scale_symbol_size
 
 2201                 markersize = 5 * scale_symbol_size
 
 2203             if not no_dist_info:
 
 2204                 color = self.colormap(mdist)
 
 2214                 markersize=markersize)
 
 2218         fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
 
 2220         [i.set_linewidth(2.0) 
for i 
in ax.spines.values()]
 
 2225             plt.savefig(filename + 
".pdf", dpi=300, transparent=
"False")
 
 2229     def get_frequency_statistics(self, prot_list,
 
 2232         violated_histogram = {}
 
 2233         satisfied_histogram = {}
 
 2234         unique_cross_links = []
 
 2236         for xl 
in self.unique_cross_link_list:
 
 2237             (r1, c1, r2, c2, mdist) = xl
 
 2240             if prot_list2 
is None:
 
 2241                 if not c1 
in prot_list:
 
 2243                 if not c2 
in prot_list:
 
 2246                 if c1 
in prot_list 
and c2 
in prot_list2:
 
 2248                 elif c1 
in prot_list2 
and c2 
in prot_list:
 
 2253             frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
 
 2255             if (r1, c1, r2, c2) 
not in unique_cross_links:
 
 2257                     if frequency 
not in violated_histogram:
 
 2258                         violated_histogram[frequency] = 1
 
 2260                         violated_histogram[frequency] += 1
 
 2262                     if frequency 
not in satisfied_histogram:
 
 2263                         satisfied_histogram[frequency] = 1
 
 2265                         satisfied_histogram[frequency] += 1
 
 2266                 unique_cross_links.append((r1, c1, r2, c2))
 
 2267                 unique_cross_links.append((r2, c2, r1, c1))
 
 2269         print(
"# satisfied")
 
 2271         total_number_of_crosslinks=0
 
 2273         for i 
in satisfied_histogram:
 
 2277             if i 
in violated_histogram:
 
 2278                 print(i, violated_histogram[i]+satisfied_histogram[i])
 
 2280                 print(i, satisfied_histogram[i])
 
 2281             total_number_of_crosslinks+=i*satisfied_histogram[i]
 
 2285         for i 
in violated_histogram:
 
 2286             print(i, violated_histogram[i])
 
 2287             total_number_of_crosslinks+=i*violated_histogram[i]
 
 2289         print(total_number_of_crosslinks)
 
 2293     def print_cross_link_binary_symbols(self, prot_list,
 
 2296         confidence_list = []
 
 2297         for xl 
in self.crosslinks:
 
 2298             (r1, c1, r2, c2, mdist, stdv, confidence,
 
 2299              unique_identifier, descriptor) = xl
 
 2301             if prot_list2 
is None:
 
 2302                 if not c1 
in prot_list:
 
 2304                 if not c2 
in prot_list:
 
 2307                 if c1 
in prot_list 
and c2 
in prot_list2:
 
 2309                 elif c1 
in prot_list2 
and c2 
in prot_list:
 
 2314             if descriptor != 
"original":
 
 2317             confidence_list.append(confidence)
 
 2319             dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
 
 2320             tmp_dist_binary = []
 
 2323                     tmp_dist_binary.append(1)
 
 2325                     tmp_dist_binary.append(0)
 
 2326             tmp_matrix.append(tmp_dist_binary)
 
 2328         matrix = list(zip(*tmp_matrix))
 
 2330         satisfied_high_sum = 0
 
 2331         satisfied_mid_sum = 0
 
 2332         satisfied_low_sum = 0
 
 2333         total_satisfied_sum = 0
 
 2334         for k, m 
in enumerate(matrix):
 
 2343             for n, b 
in enumerate(m):
 
 2344                 if confidence_list[n] == 
"0.01":
 
 2348                         satisfied_high_sum += 1
 
 2349                 elif confidence_list[n] == 
"0.05":
 
 2353                         satisfied_mid_sum += 1
 
 2354                 elif confidence_list[n] == 
"0.1":
 
 2358                         satisfied_low_sum += 1
 
 2360                     total_satisfied += 1
 
 2361                     total_satisfied_sum += 1
 
 2363             print(k, satisfied_high, total_high)
 
 2364             print(k, satisfied_mid, total_mid)
 
 2365             print(k, satisfied_low, total_low)
 
 2366             print(k, total_satisfied, total)
 
 2367         print(float(satisfied_high_sum) / len(matrix))
 
 2368         print(float(satisfied_mid_sum) / len(matrix))
 
 2369         print(float(satisfied_low_sum) / len(matrix))
 
 2372     def get_unique_crosslinks_statistics(self, prot_list,
 
 2385         satisfied_string = []
 
 2386         for xl 
in self.crosslinks:
 
 2387             (r1, c1, r2, c2, mdist, stdv, confidence,
 
 2388              unique_identifier, descriptor) = xl
 
 2390             if prot_list2 
is None:
 
 2391                 if not c1 
in prot_list:
 
 2393                 if not c2 
in prot_list:
 
 2396                 if c1 
in prot_list 
and c2 
in prot_list2:
 
 2398                 elif c1 
in prot_list2 
and c2 
in prot_list:
 
 2403             if descriptor != 
"original":
 
 2407             if confidence == 
"0.01":
 
 2411             if confidence == 
"0.05":
 
 2415             if confidence == 
"0.1":
 
 2420                 satisfied_string.append(1)
 
 2422                 satisfied_string.append(0)
 
 2424             dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
 
 2425             tmp_dist_binary = []
 
 2428                     tmp_dist_binary.append(1)
 
 2430                     tmp_dist_binary.append(0)
 
 2431             tmp_matrix.append(tmp_dist_binary)
 
 2433         print(
"unique satisfied_high/total_high", satisfied_high, 
"/", total_high)
 
 2434         print(
"unique satisfied_mid/total_mid", satisfied_mid, 
"/", total_mid)
 
 2435         print(
"unique satisfied_low/total_low", satisfied_low, 
"/", total_low)
 
 2436         print(
"total", total)
 
 2438         matrix = list(zip(*tmp_matrix))
 
 2439         satisfied_models = 0
 
 2441         for b 
in satisfied_string:
 
 2448             all_satisfied = 
True 
 2450             for n, b 
in enumerate(m):
 
 2455                 if b == 1 
and satisfied_string[n] == 1:
 
 2457                 elif b == 1 
and satisfied_string[n] == 0:
 
 2459                 elif b == 0 
and satisfied_string[n] == 0:
 
 2461                 elif b == 0 
and satisfied_string[n] == 1:
 
 2462                     all_satisfied = 
False 
 2464                 satisfied_models += 1
 
 2466             print(satstr, all_satisfied)
 
 2467         print(
"models that satisfies the median satisfied crosslinks/total models", satisfied_models, len(matrix))
 
 2469     def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
 
 2472         import matplotlib 
as mpl
 
 2474         import matplotlib.pylab 
as pl
 
 2477         for kw 
in self.cross_link_distances_unique:
 
 2478             (r1, c1, r2, c2) = kw
 
 2479             dists = self.cross_link_distances_unique[kw]
 
 2481             if prot_list2 
is None:
 
 2482                 if not c1 
in prot_list:
 
 2484                 if not c2 
in prot_list:
 
 2487                 if c1 
in prot_list 
and c2 
in prot_list2:
 
 2489                 elif c1 
in prot_list2 
and c2 
in prot_list:
 
 2494             dists.append(sum(dists))
 
 2495             tmp_matrix.append(dists)
 
 2497         tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
 
 2500         matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
 
 2502         for i 
in range(len(tmp_matrix)):
 
 2503             for k 
in range(len(tmp_matrix[i]) - 1):
 
 2504                 matrix[i][k] = tmp_matrix[i][k]
 
 2509         ax = fig.add_subplot(211)
 
 2511         cax = ax.imshow(matrix, interpolation=
'nearest')
 
 2515         pl.savefig(figurename, dpi=300)
 
 2524         arrangement=
"inter",
 
 2525             confidence_input=
"None"):
 
 2528         for xl 
in self.cross_link_distances:
 
 2529             (r1, c1, r2, c2, mdist, confidence) = xl
 
 2530             if c1 
in prots1 
and c2 
in prots2:
 
 2531                 if arrangement == 
"inter" and c1 == c2:
 
 2533                 if arrangement == 
"intra" and c1 != c2:
 
 2535                 if confidence_input == confidence:
 
 2536                     label = str(c1) + 
":" + str(r1) + \
 
 2537                         "-" + str(c2) + 
":" + str(r2)
 
 2538                     values = self.cross_link_distances[xl]
 
 2539                     frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
 
 2540                     data.append((label, values, mdist, frequency))
 
 2542         sort_by_dist = sorted(data, key=
lambda tup: tup[2])
 
 2543         sort_by_dist = list(zip(*sort_by_dist))
 
 2544         values = sort_by_dist[1]
 
 2545         positions = list(range(len(values)))
 
 2546         labels = sort_by_dist[0]
 
 2547         frequencies = list(map(float, sort_by_dist[3]))
 
 2548         frequencies = [f * 10.0 
for f 
in frequencies]
 
 2550         nchunks = int(float(len(values)) / nxl_per_row)
 
 2551         values_chunks = IMP.pmi1.tools.chunk_list_into_segments(values, nchunks)
 
 2552         positions_chunks = IMP.pmi1.tools.chunk_list_into_segments(
 
 2555         frequencies_chunks = IMP.pmi1.tools.chunk_list_into_segments(
 
 2558         labels_chunks = IMP.pmi1.tools.chunk_list_into_segments(labels, nchunks)
 
 2560         for n, v 
in enumerate(values_chunks):
 
 2561             p = positions_chunks[n]
 
 2562             f = frequencies_chunks[n]
 
 2563             l = labels_chunks[n]
 
 2565                 filename + 
"." + str(n), v, p, f,
 
 2566                 valuename=
"Distance (Ang)", positionname=
"Unique " + arrangement + 
" Crosslinks", xlabels=l)
 
 2568     def crosslink_distance_histogram(self, filename,
 
 2571                                      confidence_classes=
None,
 
 2577         if prot_list 
is None:
 
 2578             prot_list = list(self.prot_length_dict.keys())
 
 2581         for xl 
in self.crosslinks:
 
 2582             (r1, c1, r2, c2, mdist, stdv, confidence,
 
 2583              unique_identifier, descriptor) = xl
 
 2585             if not confidence_classes 
is None:
 
 2586                 if confidence 
not in confidence_classes:
 
 2589             if prot_list2 
is None:
 
 2590                 if not c1 
in prot_list:
 
 2592                 if not c2 
in prot_list:
 
 2595                 if c1 
in prot_list 
and c2 
in prot_list2:
 
 2597                 elif c1 
in prot_list2 
and c2 
in prot_list:
 
 2602             distances.append(mdist)
 
 2605             filename, distances, valuename=
"C-alpha C-alpha distance [Ang]",
 
 2606             bins=bins, color=color,
 
 2608             reference_xline=35.0,
 
 2609             yplotrange=yplotrange, normalized=normalized)
 
 2611     def scatter_plot_xl_features(self, filename,
 
 2617                                  reference_ylines=
None,
 
 2618                                  distance_color=
True,
 
 2620         import matplotlib 
as mpl
 
 2622         import matplotlib.pyplot 
as plt
 
 2623         import matplotlib.cm 
as cm
 
 2625         fig = plt.figure(figsize=(10, 10))
 
 2626         ax = fig.add_subplot(111)
 
 2628         for xl 
in self.crosslinks:
 
 2629             (r1, c1, r2, c2, mdist, stdv, confidence,
 
 2630              unique_identifier, arrangement) = xl
 
 2632             if prot_list2 
is None:
 
 2633                 if not c1 
in prot_list:
 
 2635                 if not c2 
in prot_list:
 
 2638                 if c1 
in prot_list 
and c2 
in prot_list2:
 
 2640                 elif c1 
in prot_list2 
and c2 
in prot_list:
 
 2645             xldb = self.external_csv_data[unique_identifier]
 
 2646             xldb.update({
"Distance": mdist})
 
 2647             xldb.update({
"Distance_stdv": stdv})
 
 2649             xvalue = float(xldb[feature1])
 
 2650             yvalue = float(xldb[feature2])
 
 2653                 color = self.colormap(mdist)
 
 2657             ax.plot([xvalue], [yvalue], 
'o', c=color, alpha=0.1, markersize=7)
 
 2659         if not yplotrange 
is None:
 
 2660             ax.set_ylim(yplotrange)
 
 2661         if not reference_ylines 
is None:
 
 2662             for rl 
in reference_ylines:
 
 2663                 ax.axhline(rl, color=
'red', linestyle=
'dashed', linewidth=1)
 
 2666             plt.savefig(filename + 
"." + format, dpi=150, transparent=
"False")
 
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_moldict_coorddict
return data structure for the RMSD calculation 
A decorator to associate a particle with a part of a protein/DNA/RNA. 
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Performs alignment and RMSD calculation for two sets of coordinates. 
Visualization of crosslinks. 
A class to cluster structures. 
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points. 
Legacy PMI1 module to represent, score, sample and analyze models. 
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
def do_cluster
Run K-means clustering. 
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name. 
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name. 
Tools for clustering and cluster analysis. 
static Molecule setup_particle(Model *m, ParticleIndex pi)
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD. 
def plot_field_histogram
Plot a list of histograms from a value list. 
Class for sampling a density map from particles. 
A decorator for keeping track of copies of a molecule. 
def add_subunits_density
Add a frame to the densities. 
A class to evaluate the precision of an ensemble. 
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i]. 
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD. 
def fill
Add coordinates for a single model. 
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
def get_rmsf
Calculate the residue mean square fluctuations (RMSF). 
Basic utilities for handling cryo-electron microscopy 3D density maps. 
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects. 
A decorator for a particle with x,y,z coordinates. 
def get_density
Get the current density for some component name. 
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure. 
def add_structure
Read a structure into the ensemble and store (as coordinates). 
def get_precision
Evaluate the precision of two named structure groups. 
def add_structures
Read a list of RMFs, supports parallel. 
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything. 
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions. 
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
A class for reading stat files (either rmf or ascii v1 and v2) 
Compute the RMSD (without alignment) taking into account the copy ambiguity. 
Compute mean density maps from structures. 
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index. 
def get_output
Returns output for IMP.pmi1.output.Output object. 
Classes for writing output files and processing them. 
def set_reference_structure
Read in a structure used for reference computation. 
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
def plot_fields_box_plots
Plot time series as boxplots. 
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Select hierarchy particles identified by the biological name. 
Support for the RMF file format for storing hierarchical molecular data and markup. 
A decorator for a particle with x,y,z coordinates and a radius.