3 from IMP 
import ArgumentParser
 
    9 __doc__ = 
"Cluster assembly solutions." 
   18     num_keys = len(list(keys.keys()))
 
   19     for i 
in range(num_keys + 1):
 
   20         counter.append([0, i])
 
   22         counter[e][0] = counter[e][0] + 1
 
   23     counter = sorted(counter, reverse=
True)
 
   25     for c, i 
in counter[:-1]:
 
   32     def __init__(self, cluster_ind, cluster_size, rmsd_calculated):
 
   33         self.cluster_ind = cluster_ind
 
   34         self.cluster_size = cluster_size
 
   35         self.rmsd_calculated = rmsd_calculated
 
   37     def set_distance_stats(self, avg, std):
 
   38         self.distance_avg = avg
 
   39         self.distance_std = std
 
   41     def set_angle_stats(self, avg, std):
 
   45     def set_rmsd_stats(self, avg, std):
 
   49     def set_best_sampled_data(self, ind, rmsd, cc, distance, angle):
 
   50         self.best_sampled_ind = ind
 
   51         self.best_sampled_rmsd = rmsd
 
   52         self.best_sampled_cc = cc
 
   53         self.best_sampled_distance = distance
 
   54         self.best_sampled_angle = angle
 
   56     def set_best_scored_data(self, ind, rmsd, cc, distance, angle):
 
   57         self.best_scored_ind = ind
 
   58         self.best_scored_rmsd = rmsd
 
   59         self.best_scored_cc = cc
 
   60         self.best_scored_distance = distance
 
   61         self.best_scored_angle = angle
 
   67         Clusters assembly models 
   68         - The solutions are chosen by sorting the database according to the 
   70         - The models are aligned and clustered by RMSD 
   73     def __init__(self, asmb_fn, prot_fn, map_fn, align_fn, combs_fn):
 
   74         self.asmb_fn = asmb_fn
 
   75         self.prot_fn = prot_fn
 
   77         self.align_fn = align_fn
 
   78         self.combs_fn = combs_fn
 
   81         self.asmb.set_was_used(
True)
 
   83         self.alignment_params = IMP.multifit.AlignmentParams(self.align_fn)
 
   85             self.prot_data, self.map_fn)
 
   88             self.mapping_data, self.asmb,
 
   89             self.alignment_params)
 
   90         self.align.set_was_used(
True)
 
   94         self.ensmb.set_was_used(
True)
 
   95         self.mhs = self.align.get_molecules()
 
   96         for i, mh 
in enumerate(self.mhs):
 
   98                 self.asmb.get_component_header(i).get_transformations_fn()
 
   99             self.ensmb.add_component_and_fits(
 
  102         self.dmap = IMP.em.read_map(
 
  103             self.asmb.get_assembly_header().get_dens_fn())
 
  104         self.dmap.set_was_used(
True)
 
  105         self.dmap.get_header().set_resolution(
 
  107         _ = self.asmb.get_assembly_header().get_threshold()
 
  108         self.dmap.update_voxel_size(
 
  109             self.asmb.get_assembly_header().get_spacing())
 
  110         self.dmap.set_origin(self.asmb.get_assembly_header().get_origin())
 
  115             Cluster configurations for a model based on RMSD. 
  116             An IMP.ConfigurationSet is built using the reference frames for 
  117             all of the components of the assembly for each solution 
  118             @param max_comb_ind Maximum number of components to consider 
  119             @param max_rmsd Maximum RMSD tolerated when clustering 
  122         import scipy.cluster.hierarchy
 
  124         self.mdl = self.align.get_model()
 
  127             mh_res = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
 
  130             self.all_ca.append(s1.get_selected_particles())
 
  133         print(
"load configurations")
 
  134         for combi, comb 
in enumerate(self.combs[:max_comb_ind]):
 
  135             self.ensmb.load_combination(comb)
 
  137             for mol_ca 
in self.all_ca:
 
  142             self.coords.append(c1)
 
  143             self.ensmb.unload_combination(comb)
 
  145         print(
"calculate distances")
 
  146         for i 
in range(len(self.coords)):
 
  147             for j 
in range(i + 1, len(self.coords)):
 
  148                 self.distances.append(
 
  150                         list(itertools.chain.from_iterable(self.coords[i])),
 
  151                         list(itertools.chain.from_iterable(self.coords[j]))))
 
  153         Z = fastcluster.linkage(self.distances)
 
  154         self.cluster_inds = scipy.cluster.hierarchy.fcluster(
 
  155             Z, max_rmsd, criterion=
'distance')
 
  156         self.uniques = get_uniques(self.cluster_inds)
 
  157         print(
"number of clusters", len(self.uniques))
 
  163             self, model_coords, native_coords):
 
  165         Computes the position error (placement distance) and the orientation 
  166         error (placement angle) of the coordinates in model_coords with 
  167         respect to the coordinates in native_coords. 
  168         placement distance - translation between the centroids of the 
  170         placement angle - Angle in the axis-angle formulation of the rotation 
  171             aligning the two rigid bodies. 
  175         translation_vector = native_centroid - model_centroid
 
  176         distance = translation_vector.get_magnitude()
 
  177         if (len(model_coords) != len(native_coords)):
 
  179                 "Mismatch in the number of members %d %d " % (
 
  186         angle = P.second * 180. / math.pi
 
  187         return distance, angle
 
  191         bb_native = self.dmap.get_bounding_box() 
  192         bb_solution = IMP.core.get_bounding_box(IMP.core.XYZs(ps)) 
  193         # bounding box enclosing both the particles of the native assembly 
  194         #  and the particles of the model 
  195         bb_union = IMP.algebra.get_union(bb_native, bb_solution) 
  196         # add border of 4 voxels 
  197         border = 4*voxel_size 
  198         bottom = bb_union.get_corner(0) 
  199         bottom += IMP.algebra.Vector3D(-border, -border, -border) 
  200         top = bb_union.get_corner(1) 
  201         top += IMP.algebra.Vector3D(border, border, border) 
  202         bb_union = IMP.algebra.BoundingBox3D(bottom, top) 
  206         map_solution.set_particles(ps)
 
  207         map_solution.resample()
 
  208         map_solution.set_was_used(
True)
 
  210         map_solution.calcRMS()
 
  217                                                self.dmap, threshold)
 
  220     def get_cluster_representative_combination(self, query_cluster_ind):
 
  221         return self.combs[self.clusters_data[query_cluster_ind].cluster_ind]
 
  223     def get_cluster_stats(self, query_cluster_ind):
 
  224         return self.clusters_data[query_cluster_ind]
 
  226     def do_analysis(self, max_comb_ind):
 
  227         self.clusters_data = {}
 
  228         for cluster_ind 
in self.uniques:
 
  229             self.clusters_data[cluster_ind] = self.analyze_cluster(
 
  230                 cluster_ind, max_comb_ind)
 
  232     def analyze_cluster(self, query_cluster_ind, max_comb_ind):
 
  236         mhs_native_ca_ps = []
 
  238         for i 
in range(len(self.mhs)):
 
  239             if self.asmb.get_component_header(i).get_reference_fn() == 
"":
 
  244                     self.asmb.get_component_header(
 
  250             mhs_native_ca.append([])
 
  251             mhs_native_ca_ps.append([])
 
  252             for p 
in s1.get_selected_particles():
 
  253                 mhs_native_ca[-1].append(
IMP.core.XYZ(p).get_coordinates())
 
  259         for i 
in range(len(self.mhs)):
 
  262         best_sampled_ind = -1
 
  265         for elem_ind1, cluster_ind1 
in enumerate(self.cluster_inds):
 
  266             if cluster_ind1 != query_cluster_ind:
 
  268             counter = counter + 1
 
  272                         list(itertools.chain.from_iterable(mhs_native_ca)),
 
  273                         list(itertools.chain.from_iterable(
 
  274                             self.coords[elem_ind1]))))
 
  275             if best_scored_ind == -1:
 
  276                 self.ensmb.load_combination(self.combs[elem_ind1])
 
  277                 best_scored_ind = counter
 
  278                 best_scored_cc = self.
get_cc(
 
  279                     list(itertools.chain.from_iterable(self.all_ca)))
 
  281                     best_scored_rmsd = rmsds[-1]
 
  282                     best_sampled_ind = counter
 
  283                     best_sampled_cc = best_scored_cc
 
  284                     best_sampled_rmsd = rmsds[-1]
 
  285                 self.ensmb.unload_combination(self.combs[elem_ind1])
 
  288                 if rmsds[-1] < best_scored_rmsd:
 
  289                     self.ensmb.load_combination(self.combs[elem_ind1])
 
  290                     best_sampled_ind = counter
 
  291                     best_sampled_cc = self.
get_cc(
 
  292                         list(itertools.chain.from_iterable(self.all_ca)))
 
  293                     best_sampled_rmsd = rmsds[-1]
 
  294                     self.ensmb.unload_combination(self.combs[elem_ind1])
 
  297                 for i 
in range(len(self.mhs)):
 
  299                         self.coords[elem_ind1][i],
 
  303                 distances[i].append(sum_d / len(self.mhs))
 
  304                 angles[i].append(sum_a / len(self.mhs))
 
  305         cd = ClusterData(query_cluster_ind, counter + 1, calc_rmsd)
 
  308             d = numpy.array(list(itertools.chain.from_iterable(distances)))
 
  309             a = numpy.array(list(itertools.chain.from_iterable(angles)))
 
  310             r = numpy.array(rmsds)
 
  311             cd.set_distance_stats(d.mean(), d.std())
 
  312             cd.set_angle_stats(a.mean(), a.std())
 
  313             cd.set_best_scored_data(
 
  319             cd.set_rmsd_stats(r.mean(), r.std())
 
  320             cd.set_best_sampled_data(
 
  327             cd.set_best_scored_data(
 
  338 Clustering of assembly solutions. 
  340 This program uses the Python 'fastcluster' module, which can be obtained from 
  341 http://math.stanford.edu/~muellner/fastcluster.html 
  344     p.add_argument(
"-m", 
"--max", type=int, dest=
"max", default=999999999,
 
  345                    help=
"maximum solutions to consider")
 
  346     p.add_argument(
"-r", 
"--rmsd", type=float, dest=
"rmsd", default=5,
 
  347                    help=
"maximum rmsd within a cluster")
 
  348     p.add_argument(
"assembly_file", help=
"assembly file name")
 
  349     p.add_argument(
"proteomics_file", help=
"proteomics file name")
 
  350     p.add_argument(
"mapping_file", help=
"mapping file name")
 
  351     p.add_argument(
"param_file", help=
"parameter file name")
 
  352     p.add_argument(
"combinations_file", help=
"combinations file name")
 
  353     p.add_argument(
"cluster_file", help=
"output clusters file name")
 
  355     return p.parse_args()
 
  364         args.proteomics_file,
 
  367         args.combinations_file)
 
  368     _ = clust_engine.do_clustering(args.max, args.rmsd)
 
  369     print(
"clustering completed")
 
  370     print(
"start analysis")
 
  371     clust_engine.do_analysis(args.max)
 
  373     for cluster_ind 
in clust_engine.uniques:
 
  375             clust_engine.get_cluster_representative_combination(cluster_ind))
 
  378     for cluster_ind 
in clust_engine.uniques:
 
  379         info = clust_engine.get_cluster_stats(cluster_ind)
 
  381             clust_engine.get_cluster_representative_combination(cluster_ind))
 
  382         print(
"==========Cluster index:", info.cluster_ind, 
"size:",
 
  384         if info.rmsd_calculated:
 
  385             print(
"best sampled in cluster (index,cc,distance,angle,rmsd):",
 
  386                   info.best_sampled_ind, info.best_sampled_cc,
 
  387                   info.best_sampled_distance, info.best_sampled_angle,
 
  388                   info.best_sampled_rmsd)
 
  389         if info.rmsd_calculated:
 
  390             print(
"cluster representative (index,cc,distance,angle,rmsd):",
 
  391                   info.best_scored_ind, info.best_scored_cc,
 
  392                   info.best_scored_distance, info.best_scored_angle,
 
  393                   info.best_scored_rmsd)
 
  395             print(
"cluster representative (index,cc):", info.best_scored_ind,
 
  399 if __name__ == 
"__main__":
 
An ensemble of fitting solutions. 
 
double get_coarse_cc_coefficient(const DensityMap *grid1, const DensityMap *grid2, double grid2_voxel_data_threshold, bool allow_padding=false, FloatPair norm_factors=FloatPair(0., 0.))
Calculates the cross correlation coefficient between two maps. 
 
Various classes to hold sets of particles. 
 
Clusters assembly models. 
 
void write_paths(const IntsList &paths, const std::string &txt_filename)
 
SettingsData * read_settings(const char *filename)
 
void read_pdb(TextInput input, int model, Hierarchy h)
 
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
 
Align proteomics graph to EM density map. 
 
Class for sampling a density map from particles. 
 
double get_rmsd(const Selection &s0, const Selection &s1)
 
Fitting atomic structures into a cryo-electron microscopy density map. 
 
std::pair< Vector3D, double > get_axis_and_angle(const Rotation3D &rot)
Decompose a Rotation3D object into a rotation around an axis. 
 
A decorator for a particle with x,y,z coordinates. 
 
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader. 
 
def get_placement_score_from_coordinates
Computes the position error (placement distance) and the orientation error (placement angle) of the c...
 
def do_clustering
Cluster configurations for a model based on RMSD. 
 
IMP-specific subclass of argparse.ArgumentParser. 
 
Vector3D get_centroid(const Vector3Ds &ps)
Return the centroid of a set of vectors. 
 
void set_log_level(LogLevel l)
Set the current global log level. 
 
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths. 
 
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader. 
 
def get_cc
bb_native = self.dmap.get_bounding_box() bb_solution = IMP.core.get_bounding_box(IMP.core.XYZs(ps)) bounding box enclosing both the particles of the native assemblyand the particles of the modelbb_union = IMP.algebra.get_union(bb_native, bb_solution) add border of 4 voxelsborder = 4*voxel_size bottom = bb_union.get_corner(0) bottom += IMP.algebra.Vector3D(-border, -border, -border) top = bb_union.get_corner(1) top += IMP.algebra.Vector3D(border, border, border) bb_union = IMP.algebra.BoundingBox3D(bottom, top) 
 
double get_resolution(Model *m, ParticleIndex pi)
Estimate the resolution of the hierarchy as used by Representation. 
 
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
 
Select hierarchy particles identified by the biological name.