19 Clustering of pdb models.
20 This script clusters pdb models of an structure, chosen from a
22 - It is assumed that all the pdb files belong to the same structure
23 and that the order of the atoms in the pdb files is the same in all files
24 - After the clustering procedure, a linkage matrix is generated.
29 if sys.platform ==
'win32':
30 sys.stderr.write(
"this example does not currently work on Windows\n")
33 def get_columns(fn,cols=[],delimiter=" ",comment="#"):
34 """ ge the columns of a file:
35 cols - a list of columns to extract. E.g [0,3,5]
36 If empty, all the columns are extracted
37 lines starting with the comment character are ignored """
38 columns=[[]
for i
in cols]
40 reader=csv.reader(open(fn,
"r"),delimiter=delimiter,skipinitialspace=True)
42 if(row!=[]
and row[0][0]!=comment):
44 for i
in range(0,len(row)):
45 columns[i].append(row[i])
47 for i
in range(0,len(cols)):
48 columns[i].append(row[cols[i]])
53 """ Argmin function: Returns the pair (min_value,min_index),
54 where min_index is the index of the minimum value
56 min_value = sequence[0]
58 for i
in range(0,len(sequence)):
60 if(sequence[i]<min_value):
61 min_value = sequence[i]
64 return min_value,min_index
69 fn_selection = em2d.get_example_path(
"all-models-1z5s.sel")
70 fn_em2d_scores = em2d.get_example_path(
"em2d_scores_for_clustering.data")
72 print "Reading models ..."
74 ssel = atom.ATOMPDBSelector()
76 fn_models = em2d.read_selection_file(fn_selection)
77 n_models = len(fn_models)
80 fn_model=em2d.get_example_path(fn)
81 h=atom.read_pdb(fn_model,model,ssel,
True)
83 xyz=core.XYZs(atom.get_leaves(h))
84 coords.append( [x.get_coordinates()
for x
in xyz])
86 print "Computing matrix of RMSD ..."
87 rmsds=[[0.0
for i
in range(0,n_models)]
for n
in range(0,n_models)]
88 transformations=[[[]
for i
in range(0,n_models)]
for j
in range(0,n_models)]
90 for i
in xrange(0,n_models):
91 for j
in xrange(i+1,n_models):
96 transformations[i][j]=t
97 transformations[j][i]=t.get_inverse()
98 temp = [t.get_transformed(v)
for v
in coords[i]]
104 print "Clustering (Complete linkage method)..."
105 cluster_set = em2d.do_hierarchical_clustering_complete_linkage(rmsds)
106 mat2=cluster_set.get_linkage_matrix()
107 print "Complete Linkage Matrix"
112 em2d_scores = get_columns(fn_em2d_scores,[1])
113 em2d_scores = em2d_scores[0]
117 print "clusters below cutoff",rmsd_cutoff,
"Angstroms"
118 clusters=cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
120 elems=cluster_set.get_cluster_elements(c)
123 scores_elements.append(em2d_scores[cid])
124 print "Cluster",c,
":",elems,scores_elements,
126 min_value,min_index= argmin(scores_elements)
127 min_elem_id=elems[min_index]
129 print "representative element",min_elem_id,min_value
131 pdb_name=
"cluster-%03d-elem-%03d.pdb" % (c,i)
134 print "Writing element",i,
"aligned to ",min_elem_id,
":",pdb_name
135 T=core.Transform(transformations[i][min_elem_id])
136 ps=atom.get_leaves(hierarchies[i])
140 print "Writing representative element",min_elem_id,
":",pdb_name
141 atom.write_pdb(hierarchies[i],pdb_name)