This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
11 from __future__
import print_function
23 Clustering of pdb models.
24 This script clusters pdb models of an structure, chosen from a
26 - It is assumed that all the pdb files belong to the same structure
27 and that the order of the atoms in the pdb files is the same in all files
28 - After the clustering procedure, a linkage matrix is generated.
33 if sys.platform ==
'win32':
34 sys.stderr.write(
"this example does not currently work on Windows\n")
38 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
39 """ ge the columns of a file:
40 cols - a list of columns to extract. E.g [0,3,5]
41 If empty, all the columns are extracted
42 lines starting with the comment character are ignored """
43 columns = [[]
for i
in cols]
46 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
48 if(row != []
and row[0][0] != comment):
50 for i
in range(0, len(row)):
51 columns[i].append(row[i])
53 for i
in range(0, len(cols)):
54 columns[i].append(row[cols[i]])
59 """ Argmin function: Returns the pair (min_value,min_index),
60 where min_index is the index of the minimum value
62 min_value = sequence[0]
64 for i
in range(0, len(sequence)):
65 if(sequence[i] < min_value):
66 min_value = sequence[i]
68 return min_value, min_index
74 print(
"Reading models ...")
78 fn_models = IMP.em2d.read_selection_file(fn_selection)
79 n_models = len(fn_models)
86 coords.append([x.get_coordinates()
for x
in xyz])
88 print(
"Computing matrix of RMSD ...")
89 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
90 transformations = [[[]
for i
in range(0, n_models)]
91 for j
in range(0, n_models)]
93 for i
in range(0, n_models):
94 for j
in range(i + 1, n_models):
99 transformations[i][j] = t
100 transformations[j][i] = t.get_inverse()
101 temp = [t.get_transformed(v)
for v
in coords[i]]
107 print(
"Clustering (Complete linkage method)...")
108 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)
109 mat2 = cluster_set.get_linkage_matrix()
110 print(
"Complete Linkage Matrix")
115 em2d_scores = get_columns(fn_em2d_scores, [1])
116 em2d_scores = em2d_scores[0]
120 print(
"clusters below cutoff", rmsd_cutoff,
"Angstroms")
121 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
123 elems = cluster_set.get_cluster_elements(c)
126 scores_elements.append(em2d_scores[cid])
127 print(
"Cluster", c,
":", elems, scores_elements, end=
' ')
129 min_value, min_index = argmin(scores_elements)
130 min_elem_id = elems[min_index]
132 print(
"representative element", min_elem_id, min_value)
134 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
136 if(i != min_elem_id):
137 print(
"Writing element", i,
"aligned to ", min_elem_id,
":",
142 T.apply_index(model, p.get_particle_index())
144 print(
"Writing representative element", min_elem_id,
":", pdb_name)