6 from IMP
import OptionParser
10 usage =
"usage %prog [options] <asmb.input> <proteomics.input> <mapping.input> <alignment params> <combinatins> <diameter> <output combinations>\n"
11 usage +=
"A script for clustering an ensemble of solutions"
13 parser.add_option(
"-m",
"--max", type=
"int", dest=
"max", default=999999999,
14 help=
"maximum number of combinations to consider")
15 (options, args) = parser.parse_args()
17 parser.error(
"incorrect number of arguments")
18 return [options, args]
21 def run(asmb_fn, proteomics_fn, mapping_fn, align_param_fn,
22 comb_fn, diameter, output_comb_fn, max_combs):
26 prot_data, mapping_fn)
27 alignment_params = IMP.multifit.AlignmentParams(align_param_fn)
31 mapping_data, asmb_data, alignment_params)
32 mdl = align.get_model()
33 mhs = align.get_molecules()
35 for i, mh
in enumerate(mhs):
36 ensb.add_component_and_fits(mh,
41 print "NUMBER OF COMPS:", asmb_data.get_number_of_component_headers()
42 for i
in range(asmb_data.get_number_of_component_headers()):
46 mh_paths = mapping_data.get_paths_for_protein(
47 prot_data.get_protein_name(i))
49 for j
in range(asmb_data.get_number_of_component_headers()):
51 for j
in range(len(mh_paths)):
53 ensb.load_combination(dummy_comb)
56 ensb.unload_combination(dummy_comb)
57 mol_path_centers.append(mol_centers)
58 for i, p
in enumerate(mol_path_centers):
59 print "number of paths for mol:", i,
"is", len(p)
63 for comb
in combs[:max_combs]:
65 for i
in range(len(mhs)):
66 mh_c += mol_path_centers[i][comb[i]]
71 print "number of clusters:", bin_cluster.get_number_of_clusters()
73 for k
in range(bin_cluster.get_number_of_clusters()):
74 bc = bin_cluster.get_cluster(k)
75 cluster_stat.append([len(bc), k, bc])
76 cluster_stat = sorted(
78 key=operator.itemgetter(0),
81 for ind, [cluster_size, cluster_ind, cluster_elems]
in enumerate(cluster_stat):
82 print "cluster index:", ind,
"with", cluster_size,
"combinations"
83 cluster_reps.append(combs[cluster_elems[0]])
84 print "============clustering============"
85 print "Number of clusters found " + str(len(cluster_reps))
86 print "=================================="
89 if __name__ ==
"__main__":
90 options, args = parse_args()
92 run(args[0], args[1], args[2], args[3],
93 args[4], float(args[5]), args[6], options.max)
An ensemble of fitting solutions.
void write_paths(const IntsList &paths, const std::string &txt_filename)
algebra::Vector3D get_centroid(const XYZs &ps)
Get the centroid.
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
PartitionalClusteringWithCenter * create_bin_based_clustering(Embedding *embed, double side)
See IMP.multifit for more information.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
Simply return the coordinates of a VectorD.