3 __doc__ =
"Cluster assembly solutions."
5 from optparse
import OptionParser
17 num_keys=len(keys.keys())
18 for i
in range(num_keys+1):
21 counter[e][0]=counter[e][0]+1
22 counter=sorted(counter,reverse=
True)
24 for c,i
in counter[:-1]:
30 def __init__(self,cluster_ind,cluster_size,rmsd_calculated):
31 self.cluster_ind=cluster_ind
32 self.cluster_size=cluster_size
33 self.rmsd_calculated=rmsd_calculated
34 def set_distance_stats(self,avg,std):
37 def set_angle_stats(self,avg,std):
40 def set_rmsd_stats(self,avg,std):
43 def set_best_sampled_data(self,ind,rmsd,cc,distance,angle):
44 self.best_sampled_ind=ind
45 self.best_sampled_rmsd=rmsd
46 self.best_sampled_cc=cc
47 self.best_sampled_distance=distance
48 self.best_sampled_angle=angle
49 def set_best_scored_data(self,ind,rmsd,cc,distance,angle):
50 self.best_scored_ind=ind
51 self.best_scored_rmsd=rmsd
52 self.best_scored_cc=cc
53 self.best_scored_distance=distance
54 self.best_scored_angle=angle
58 Clusters assembly models
59 - The solutions are chosen by sorting the database according to the
61 - The models are aligned and clustered by RMSD
63 def __init__(self, asmb_fn, prot_fn, map_fn, align_fn, combs_fn):
64 self.asmb_fn = asmb_fn
65 self.prot_fn = prot_fn
67 self.align_fn = align_fn
68 self.combs_fn = combs_fn
71 self.asmb.set_was_used(
True)
73 self.alignment_params = IMP.multifit.AlignmentParams(self.align_fn)
76 self.align=IMP.multifit.ProteomicsEMAlignmentAtomic(self.mapping_data,self.asmb,
77 self.alignment_params)
78 self.align.set_was_used(
True)
81 self.ensmb=IMP.multifit.Ensemble(self.asmb,self.mapping_data)
82 self.ensmb.set_was_used(
True)
83 self.mhs=self.align.get_molecules()
84 for i,mh
in enumerate(self.mhs):
85 self.ensmb.add_component_and_fits(mh,
88 self.dmap=
IMP.em.read_map(self.asmb.get_assembly_header().get_dens_fn())
89 self.dmap.set_was_used(
True)
90 self.dmap.get_header().set_resolution(
91 self.asmb.get_assembly_header().get_resolution())
92 threshold=self.asmb.get_assembly_header().get_threshold()
93 self.dmap.update_voxel_size(self.asmb.get_assembly_header().get_spacing())
94 self.dmap.set_origin(self.asmb.get_assembly_header().get_origin())
98 Cluster configurations for a model based on RMSD.
99 An IMP.ConfigurationSet is built using the reference frames for
100 all of the components of the assembly for each solution
101 @param max_comb_ind Maximum number of components to consider
102 @param max_rmsd Maximum RMSD tolerated when clustering
105 import scipy.cluster.hierarchy
107 self.mdl = self.align.get_model()
113 self.all_ca.append(s1.get_selected_particles())
116 print "load configurations"
117 for combi,comb
in enumerate(self.combs[:max_comb_ind]):
118 self.ensmb.load_combination(comb)
120 for mol_ca
in self.all_ca:
125 self.coords.append(c1)
126 self.ensmb.unload_combination(comb)
128 print "calculate distances"
129 for i
in range(len(self.coords)):
130 for j
in range(i+1,len(self.coords)):
131 self.distances.append(
IMP.atom.get_rmsd(list(itertools.chain.from_iterable(self.coords[i])),
132 list(itertools.chain.from_iterable(self.coords[j]))))
134 Z=fastcluster.linkage(self.distances)
135 self.cluster_inds=scipy.cluster.hierarchy.fcluster(Z,max_rmsd,criterion=
'distance')
136 self.uniques=get_uniques(self.cluster_inds)
137 print "number of clusters",len(self.uniques)
145 Computes the position error (placement distance) and the orientation error (placement angle) of the coordinates in model_coords respect to the coordinates in native_coords.
146 placement distance - translation between the centroids of the
147 coordinates placement angle - Angle in the axis-angle formulation of the rotation
148 aligning the two rigid bodies.
152 translation_vector = native_centroid - model_centroid
153 distance = translation_vector.get_magnitude()
154 if(len(model_coords) != len(native_coords) ):
156 "Mismatch in the number of members %d %d " % (
158 len(native_coords)) )
162 angle = P.second*180./math.pi
163 return distance, angle
168 bb_native = self.dmap.get_bounding_box()
169 bb_solution = IMP.core.get_bounding_box(IMP.core.XYZs(ps))
170 # bounding box enclosing both the particles of the native assembly
171 # and the particles of the model
172 bb_union = IMP.algebra.get_union(bb_native, bb_solution)
173 # add border of 4 voxels
174 border = 4*voxel_size
175 bottom = bb_union.get_corner(0)
176 bottom += IMP.algebra.Vector3D(-border, -border, -border)
177 top = bb_union.get_corner(1)
178 top += IMP.algebra.Vector3D(border, border, border)
179 bb_union = IMP.algebra.BoundingBox3D(bottom, top)
182 resolution = self.dmap.get_header().get_resolution()
183 voxel_size = self.dmap.get_spacing()
186 map_solution.set_particles(ps)
187 map_solution.resample()
188 map_solution.set_was_used(
True)
190 map_solution.calcRMS()
192 coarse_cc.set_was_used(
True)
198 ccc = coarse_cc.cross_correlation_coefficient(map_solution,
199 self.dmap, threshold)
203 def get_cluster_representative_combination(self,query_cluster_ind):
204 return self.combs[self.clusters_data[query_cluster_ind].cluster_ind]
206 def get_cluster_stats(self,query_cluster_ind):
207 return self.clusters_data[query_cluster_ind]
209 def do_analysis(self,max_comb_ind):
210 self.clusters_data={}
211 for cluster_ind
in self.uniques:
212 self.clusters_data[cluster_ind]=self.analyze_cluster(cluster_ind,max_comb_ind)
214 def analyze_cluster(self,query_cluster_ind,max_comb_ind):
220 for i
in range(len(self.mhs)):
221 if self.asmb.get_component_header(i).get_reference_fn() ==
"":
224 mhs_native.append(
IMP.atom.read_pdb(self.asmb.get_component_header(i).get_reference_fn(),self.mdl))
227 mhs_native_ca.append([])
228 mhs_native_ca_ps.append([])
229 for p
in s1.get_selected_particles():
230 mhs_native_ca[-1].append(
IMP.core.XYZ(p).get_coordinates())
236 for i
in range(len(self.mhs)):
244 for elem_ind1,cluster_ind1
in enumerate(self.cluster_inds):
245 if cluster_ind1 != query_cluster_ind:
249 rmsds.append(
IMP.atom.get_rmsd(list(itertools.chain.from_iterable(mhs_native_ca)),
250 list(itertools.chain.from_iterable(self.coords[elem_ind1]))))
251 if best_scored_ind==-1:
252 self.ensmb.load_combination(self.combs[elem_ind1])
253 best_scored_ind=counter
254 best_scored_cc=self.
get_cc(
255 list(itertools.chain.from_iterable(self.all_ca)))
257 best_scored_rmsd = rmsds[-1]
258 best_sampled_ind=counter
259 best_sampled_cc=best_scored_cc
260 best_sampled_rmsd=rmsds[-1]
261 self.ensmb.unload_combination(self.combs[elem_ind1])
264 if rmsds[-1]<best_scored_rmsd:
265 self.ensmb.load_combination(self.combs[elem_ind1])
266 best_sampled_ind=counter
267 best_sampled_cc=self.
get_cc(
268 list(itertools.chain.from_iterable(self.all_ca)))
269 best_sampled_rmsd=rmsds[-1]
270 self.ensmb.unload_combination(self.combs[elem_ind1])
273 for i
in range(len(self.mhs)):
278 distances[i].append(sum_d/len(self.mhs))
279 angles[i].append(sum_a/len(self.mhs))
280 cd=ClusterData(query_cluster_ind,counter+1,calc_rmsd)
283 d = numpy.array(list(itertools.chain.from_iterable(distances)))
284 a = numpy.array(list(itertools.chain.from_iterable(angles)))
285 r = numpy.array(rmsds)
286 cd.set_distance_stats(d.mean(),d.std())
287 cd.set_angle_stats(a.mean(),a.std())
288 cd.set_best_scored_data(best_scored_ind, best_scored_rmsd,best_scored_cc,d[0],a[0])
289 cd.set_rmsd_stats(r.mean(),r.std())
290 cd.set_best_sampled_data(best_sampled_ind,best_sampled_rmsd,best_sampled_cc,d[best_sampled_ind],a[best_sampled_ind])
292 cd.set_best_scored_data(best_scored_ind,-1,best_scored_cc,-1,-1)
296 usage =
"""%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
297 <alignment.params> <combinations> <output: clustered combinations>
299 Clustering of assembly solutions.
301 This program uses the Python 'fastcluster' module, which can be obtained from
302 http://math.stanford.edu/~muellner/fastcluster.html
305 parser.add_option(
"-m",
"--max", type=
"int", dest=
"max", default=999999999,
306 help=
"maximum solutions to consider")
307 parser.add_option(
"-r",
"--rmsd", type=
"float", dest=
"rmsd", default=5,
308 help=
"maximum rmsd within a cluster")
309 options, args = parser.parse_args()
311 parser.error(
"incorrect number of arguments")
316 options,args = usage()
325 clusters=clust_engine.do_clustering(options.max,options.rmsd)
326 cluster_representatives=[]
327 print "clustering completed"
328 print "start analysis"
329 clust_engine.do_analysis(options.max)
331 for cluster_ind
in clust_engine.uniques:
332 repr_combs.append(clust_engine.get_cluster_representative_combination(cluster_ind))
335 for cluster_ind
in clust_engine.uniques:
336 info=clust_engine.get_cluster_stats(cluster_ind)
337 repr_combs.append(clust_engine.get_cluster_representative_combination(cluster_ind))
338 print "==========Cluster index:",info.cluster_ind,
"size:",info.cluster_size
339 if info.rmsd_calculated:
340 print "best sampled in cluster (index,cc,distance,angle,rmsd):",info.best_sampled_ind,info.best_sampled_cc,info.best_sampled_distance,info.best_sampled_angle,info.best_sampled_rmsd
341 if info.rmsd_calculated:
342 print "cluster representative (index,cc,distance,angle,rmsd):",info.best_scored_ind,info.best_scored_cc,info.best_scored_distance,info.best_scored_angle,info.best_scored_rmsd
344 print "cluster representative (index,cc):",info.best_scored_ind,info.best_scored_cc
347 if __name__ ==
"__main__":