3 from __future__
import print_function
6 from IMP
import ArgumentParser
12 desc =
"A script for clustering an ensemble of solutions"
13 p = ArgumentParser(description=desc)
14 p.add_argument(
"-m",
"--max", type=int, dest=
"max", default=999999999,
15 help=
"maximum number of combinations to consider")
16 p.add_argument(
"assembly_file", help=
"assembly file name")
17 p.add_argument(
"proteomics_file", help=
"proteomics file name")
18 p.add_argument(
"mapping_file", help=
"mapping file name")
19 p.add_argument(
"param_file", help=
"parameter file name")
20 p.add_argument(
"combinations_file", help=
"combinations file name")
21 p.add_argument(
"diameter", type=float, help=
"cluster diameter")
22 p.add_argument(
"cluster_file", help=
"output clusters file name")
26 def run(asmb_fn, proteomics_fn, mapping_fn, align_param_fn,
27 comb_fn, diameter, output_comb_fn, max_combs):
31 prot_data, mapping_fn)
32 alignment_params = IMP.multifit.AlignmentParams(align_param_fn)
36 mapping_data, asmb_data, alignment_params)
38 mhs = align.get_molecules()
40 for i, mh
in enumerate(mhs):
41 ensb.add_component_and_fits(
44 asmb_data.get_component_header(i).get_transformations_fn()))
48 print(
"NUMBER OF COMPS:", asmb_data.get_number_of_component_headers())
49 for i
in range(asmb_data.get_number_of_component_headers()):
53 mh_paths = mapping_data.get_paths_for_protein(
54 prot_data.get_protein_name(i))
56 for j
in range(asmb_data.get_number_of_component_headers()):
58 for j
in range(len(mh_paths)):
60 ensb.load_combination(dummy_comb)
63 ensb.unload_combination(dummy_comb)
64 mol_path_centers.append(mol_centers)
65 for i, p
in enumerate(mol_path_centers):
66 print(
"number of paths for mol:", i,
"is", len(p))
70 for comb
in combs[:max_combs]:
72 for i
in range(len(mhs)):
73 mh_c += mol_path_centers[i][comb[i]]
78 print(
"number of clusters:", bin_cluster.get_number_of_clusters())
80 for k
in range(bin_cluster.get_number_of_clusters()):
81 bc = bin_cluster.get_cluster(k)
82 cluster_stat.append([len(bc), k, bc])
83 cluster_stat = sorted(
85 key=operator.itemgetter(0),
88 for ind, [cluster_size, cluster_ind,
89 cluster_elems]
in enumerate(cluster_stat):
90 print(
"cluster index:", ind,
"with", cluster_size,
"combinations")
91 cluster_reps.append(combs[cluster_elems[0]])
92 print(
"============clustering============")
93 print(
"Number of clusters found " + str(len(cluster_reps)))
94 print(
"==================================")
98 if __name__ ==
"__main__":
100 run(args.assembly_file, args.proteomics_file, args.mapping_file,
101 args.param_file, args.combinations_file, args.diameter,
102 args.cluster_file, args.max)
An ensemble of fitting solutions.
void write_paths(const IntsList &paths, const std::string &txt_filename)
algebra::Vector3D get_centroid(const XYZs &ps)
Get the centroid.
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
PartitionalClusteringWithCenter * create_bin_based_clustering(Embedding *embed, double side)
Fitting atomic structures into a cryo-electron microscopy density map.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
Simply return the coordinates of a VectorD.