6 import IMP.em2d.imp_general.io
as io
7 import IMP.em2d.imp_general.representation
as representation
21 log = logging.getLogger(
"cluster_solutions")
24 class AlignmentClustering:
26 Clusters solutions present in a database.
27 - The solutions are chosen by sorting the database according to the
29 - The models are aligned and clustered by RMSD
31 def __init__(self, exp):
33 @param exp an Experiment class containing the names of the pdb files
37 def cluster(self, fn_database, n_solutions, orderby, max_rmsd):
39 @param fn_database Database of results
40 @param n_solutions Number of solutions to use for clustering
41 @param orderby Measure used to order solutions
42 @param max_rmsd See do_clustering()
44 log.debug(
"Call to cluster()")
46 db = solutions_io.ResultsDB()
47 db.connect(fn_database)
48 fields = [
"reference_frames",
"solution_id" ]
49 data = db.get_solutions( fields, n_solutions,orderby)
51 self.solution_ids = [row[1]
for row
in data]
55 rs = row[0].split(
"/")
56 RFs = [io.TextToReferenceFrame(r).get_reference_frame()
for r
in rs]
58 self.do_clustering(confs_RFs, max_rmsd)
60 def do_clustering(self, confs_RFs, max_rmsd):
62 Cluster configurations for a model based on RMSD.
63 An IMP.ConfigurationSet is built using the reference frames for
64 of the components of the assembly for each solution
65 @param confs_RFs A lsit containing a tuples of reference frames.
66 Each tuple contains the reference frame for the rigid body
67 of one component of the assembly
68 @param max_rmsd Maximum RMSD tolerated when clustering
71 assembly = representation.create_assembly(model, self.exp.fn_pdbs)
72 rbs = representation.create_rigid_bodies(assembly)
75 representation.set_reference_frames(rbs, RFs)
76 configuration_set.save_configuration()
77 particles_container = container.ListSingletonContainer(model)
78 particles_container.add_particles(atom.get_leaves(assembly))
79 metric = stats.ConfigurationSetRMSDMetric(
80 configuration_set,particles_container,
True)
81 log.info(
"Clustering ... ")
82 maximum_centrality = 10
83 self.pclus = stats.create_centrality_clustering( metric, max_rmsd,
85 n = self.pclus.get_number_of_clusters()
86 log.info(
"Number of clusters found: %s", n)
89 def store_clusters(self, fn_database, tbl="clusters"):
91 Store the clusters in the database.
92 The database does not necessarily has to be the same database
93 used to read the solutions
94 @param fn_database Database where the clusters are written
95 @param tbl Table of the database where the clusters are written
97 if not hasattr(self,
"pclus"):
98 raise ValueError(
"Clustering not performed")
99 db = solutions_io.ResultsDB()
100 if not os.path.exists(fn_database):
101 db.create(fn_database)
102 db.connect(fn_database)
103 db.add_clusters_table(tbl)
104 n_clusters = self.pclus.get_number_of_clusters()
106 for i
in range(n_clusters):
108 elements = self.pclus.get_cluster(i)
109 r = self.pclus.get_cluster_representative(i)
110 n_elements = len(elements)
112 solution_ids = [self.solution_ids[k]
for k
in elements]
113 elements =
"|".join( map(str, elements) )
114 solution_ids =
"|".join( map(str, solution_ids) )
115 db.add_cluster_record( i, n_elements, r, elements, solution_ids)
116 db.store_cluster_data()
119 if __name__ ==
"__main__":
123 "Clusters the best solutions contained in the database, and writes a "
124 " new table in the database containing the clusters ids and members")
125 parser.add_option(
"--exp",
128 help=
"File describing an experiment ")
129 parser.add_option(
"--db",
131 help=
"Database of results")
132 parser.add_option(
"--o",
134 default=
"clusters.db",
135 help=
"Database file to store the clusters obtained. It "\
136 "can be the same one containing the solutions")
137 parser.add_option(
"--n",
141 help=
"Number of solutions to cluster")
142 parser.add_option(
"--orderby",
145 help=
"Sor the solutions according to this measure before "\
147 parser.add_option(
"--log",
150 help=
"File for logging")
151 parser.add_option(
"--rmsd",
155 help=
"Maximum rmsd centroids to define clusters")
157 args = parser.parse_args()
159 if(len(sys.argv) == 1):
163 logging.basicConfig(filename=args.log, filemode=
"w")
165 logging.basicConfig(stream=sys.stdout)
166 logging.root.setLevel(logging.DEBUG)
168 if(args.fn_database):
169 if(
not args.n_solutions
or not args.orderby):
170 raise ValueError(
"parameters --n and --orderby required")
171 exp = utility.get_experiment_params(args.experiment)
172 tc = AlignmentClustering(exp)
173 tc.cluster(args.fn_database, args.n_solutions, args.orderby,
175 tc.store_clusters(args.fn_output_db,
"clusters")