This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
11 from __future__
import print_function
24 Clustering of pdb models.
25 This script clusters pdb models of an structure, chosen from a
27 - It is assumed that all the pdb files belong to the same structure
28 and that the order of the atoms in the pdb files is the same in all files
29 - After the clustering procedure, a linkage matrix is generated.
34 if sys.platform ==
'win32':
35 sys.stderr.write(
"this example does not currently work on Windows\n")
39 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
40 """ ge the columns of a file:
41 cols - a list of columns to extract. E.g [0,3,5]
42 If empty, all the columns are extracted
43 lines starting with the comment character are ignored """
44 columns = [[]
for i
in cols]
47 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
49 if(row != []
and row[0][0] != comment):
51 for i
in range(0, len(row)):
52 columns[i].append(row[i])
54 for i
in range(0, len(cols)):
55 columns[i].append(row[cols[i]])
60 """ Argmin function: Returns the pair (min_value,min_index),
61 where min_index is the index of the minimum value
63 min_value = sequence[0]
65 for i
in range(0, len(sequence)):
67 if(sequence[i] < min_value):
68 min_value = sequence[i]
71 return min_value, min_index
79 print(
"Reading models ...")
83 fn_models = IMP.em2d.read_selection_file(fn_selection)
84 n_models = len(fn_models)
91 coords.append([x.get_coordinates()
for x
in xyz])
93 print(
"Computing matrix of RMSD ...")
94 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
95 transformations = [[[]
for i
in range(0, n_models)]
96 for j
in range(0, n_models)]
98 for i
in range(0, n_models):
99 for j
in range(i + 1, n_models):
104 transformations[i][j] = t
105 transformations[j][i] = t.get_inverse()
106 temp = [t.get_transformed(v)
for v
in coords[i]]
112 print(
"Clustering (Complete linkage method)...")
113 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)
114 mat2 = cluster_set.get_linkage_matrix()
115 print(
"Complete Linkage Matrix")
120 em2d_scores = get_columns(fn_em2d_scores, [1])
121 em2d_scores = em2d_scores[0]
125 print(
"clusters below cutoff", rmsd_cutoff,
"Angstroms")
126 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
128 elems = cluster_set.get_cluster_elements(c)
131 scores_elements.append(em2d_scores[cid])
132 print(
"Cluster", c,
":", elems, scores_elements, end=
' ')
134 min_value, min_index = argmin(scores_elements)
135 min_elem_id = elems[min_index]
137 print(
"representative element", min_elem_id, min_value)
139 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
141 if(i != min_elem_id):
142 print(
"Writing element", i,
"aligned to ", min_elem_id,
":", pdb_name)
146 T.apply_index(model, p.get_particle_index())
148 print(
"Writing representative element", min_elem_id,
":", pdb_name)