This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
19 Clustering of pdb models.
20 This script clusters pdb models of an structure, chosen from a
22 - It is assumed that all the pdb files belong to the same structure
23 and that the order of the atoms in the pdb files is the same in all files
24 - After the clustering procedure, a linkage matrix is generated.
29 if sys.platform ==
'win32':
30 sys.stderr.write(
"this example does not currently work on Windows\n")
33 def get_columns(fn,cols=[],delimiter=" ",comment="#"):
34 """ ge the columns of a file:
35 cols - a list of columns to extract. E.g [0,3,5]
36 If empty, all the columns are extracted
37 lines starting with the comment character are ignored """
38 columns=[[]
for i
in cols]
40 reader=csv.reader(open(fn,
"r"),delimiter=delimiter,skipinitialspace=True)
42 if(row!=[]
and row[0][0]!=comment):
44 for i
in range(0,len(row)):
45 columns[i].append(row[i])
47 for i
in range(0,len(cols)):
48 columns[i].append(row[cols[i]])
53 """ Argmin function: Returns the pair (min_value,min_index),
54 where min_index is the index of the minimum value
56 min_value = sequence[0]
58 for i
in range(0,len(sequence)):
60 if(sequence[i]<min_value):
61 min_value = sequence[i]
64 return min_value,min_index
69 fn_selection = em2d.get_example_path(
"all-models-1z5s.sel")
70 fn_em2d_scores = em2d.get_example_path(
"em2d_scores_for_clustering.data")
72 print "Reading models ..."
74 ssel = atom.ATOMPDBSelector()
76 fn_models = em2d.read_selection_file(fn_selection)
77 n_models = len(fn_models)
80 fn_model=em2d.get_example_path(fn)
81 h=atom.read_pdb(fn_model,model,ssel,
True)
83 xyz=core.XYZs(atom.get_leaves(h))
84 coords.append( [x.get_coordinates()
for x
in xyz])
86 print "Computing matrix of RMSD ..."
87 rmsds=[[0.0
for i
in range(0,n_models)]
for n
in range(0,n_models)]
88 transformations=[[[]
for i
in range(0,n_models)]
for j
in range(0,n_models)]
90 for i
in xrange(0,n_models):
91 for j
in xrange(i+1,n_models):
96 transformations[i][j]=t
97 transformations[j][i]=t.get_inverse()
98 temp = [t.get_transformed(v)
for v
in coords[i]]
104 print "Clustering (Complete linkage method)..."
105 cluster_set = em2d.do_hierarchical_clustering_complete_linkage(rmsds)
106 mat2=cluster_set.get_linkage_matrix()
107 print "Complete Linkage Matrix"
112 em2d_scores = get_columns(fn_em2d_scores,[1])
113 em2d_scores = em2d_scores[0]
117 print "clusters below cutoff",rmsd_cutoff,
"Angstroms"
118 clusters=cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
120 elems=cluster_set.get_cluster_elements(c)
123 scores_elements.append(em2d_scores[cid])
124 print "Cluster",c,
":",elems,scores_elements,
126 min_value,min_index= argmin(scores_elements)
127 min_elem_id=elems[min_index]
129 print "representative element",min_elem_id,min_value
131 pdb_name=
"cluster-%03d-elem-%03d.pdb" % (c,i)
134 print "Writing element",i,
"aligned to ",min_elem_id,
":",pdb_name
135 T=core.Transform(transformations[i][min_elem_id])
136 ps=atom.get_leaves(hierarchies[i])
140 print "Writing representative element",min_elem_id,
":",pdb_name
141 atom.write_pdb(hierarchies[i],pdb_name)