6 from optparse
import OptionParser
9 usage =
"usage %prog [options] <asmb.input> <proteomics.input> <mapping.input> <alignment params> <combinatins> <diameter> <output combinations>\n"
10 usage+=
"A script for clustering an ensemble of solutions"
12 parser.add_option(
"-m",
"--max", type=
"int", dest=
"max", default=999999999,
13 help=
"maximum number of combinations to consider")
14 (options, args) = parser.parse_args()
16 parser.error(
"incorrect number of arguments")
19 def run(asmb_fn,proteomics_fn,mapping_fn,align_param_fn,
20 comb_fn,diameter,output_comb_fn,max_combs):
24 alignment_params = IMP.multifit.AlignmentParams(align_param_fn)
27 align=IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data,asmb_data,alignment_params)
29 mhs=align.get_molecules()
30 ensb=IMP.multifit.Ensemble(asmb_data,mapping_data)
31 for i,mh
in enumerate(mhs):
32 ensb.add_component_and_fits(mh,
37 print "NUMBER OF COMPS:",asmb_data.get_number_of_component_headers()
38 for i
in range(asmb_data.get_number_of_component_headers()):
42 mh_paths=mapping_data.get_paths_for_protein(prot_data.get_protein_name(i))
44 for j
in range(asmb_data.get_number_of_component_headers()):
46 for j
in range(len(mh_paths)):
48 ensb.load_combination(dummy_comb)
51 ensb.unload_combination(dummy_comb)
52 mol_path_centers.append(mol_centers)
53 for i,p
in enumerate(mol_path_centers):
54 print "number of paths for mol:",i,
"is",len(p)
58 for comb
in combs[:max_combs]:
60 for i
in range(len(mhs)):
61 mh_c+=mol_path_centers[i][comb[i]]
66 print "number of clusters:",bin_cluster.get_number_of_clusters()
68 for k
in range(bin_cluster.get_number_of_clusters()):
69 bc= bin_cluster.get_cluster(k)
70 cluster_stat.append([len(bc),k,bc])
71 cluster_stat=sorted(cluster_stat,key=operator.itemgetter(0),reverse=
True)
73 for ind,[cluster_size,cluster_ind,cluster_elems]
in enumerate(cluster_stat):
74 print "cluster index:",ind,
"with",cluster_size,
"combinations"
75 cluster_reps.append(combs[cluster_elems[0]])
76 print "============clustering============"
77 print "Number of clusters found "+str(len(cluster_reps))
78 print "=================================="
81 if __name__==
"__main__":
82 options,args = parse_args()
84 run(args[0],args[1],args[2],args[3],args[4],float(args[5]),args[6],options.max)