This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
11 from __future__
import print_function
21 Clustering of pdb models.
22 This script clusters pdb models of an structure, chosen from a
24 - It is assumed that all the pdb files belong to the same structure
25 and that the order of the atoms in the pdb files is the same in all files
26 - After the clustering procedure, a linkage matrix is generated.
31 if sys.platform ==
'win32':
32 sys.stderr.write(
"this example does not currently work on Windows\n")
36 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
37 """ ge the columns of a file:
38 cols - a list of columns to extract. E.g [0,3,5]
39 If empty, all the columns are extracted
40 lines starting with the comment character are ignored """
41 columns = [[]
for i
in cols]
44 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
46 if(row != []
and row[0][0] != comment):
48 for i
in range(0, len(row)):
49 columns[i].append(row[i])
51 for i
in range(0, len(cols)):
52 columns[i].append(row[cols[i]])
57 """ Argmin function: Returns the pair (min_value,min_index),
58 where min_index is the index of the minimum value
60 min_value = sequence[0]
62 for i
in range(0, len(sequence)):
64 if(sequence[i] < min_value):
65 min_value = sequence[i]
68 return min_value, min_index
76 print(
"Reading models ...")
80 fn_models = IMP.em2d.read_selection_file(fn_selection)
81 n_models = len(fn_models)
88 coords.append([x.get_coordinates()
for x
in xyz])
90 print(
"Computing matrix of RMSD ...")
91 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
92 transformations = [[[]
for i
in range(0, n_models)]
93 for j
in range(0, n_models)]
95 for i
in range(0, n_models):
96 for j
in range(i + 1, n_models):
101 transformations[i][j] = t
102 transformations[j][i] = t.get_inverse()
103 temp = [t.get_transformed(v)
for v
in coords[i]]
109 print(
"Clustering (Complete linkage method)...")
110 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)
111 mat2 = cluster_set.get_linkage_matrix()
112 print(
"Complete Linkage Matrix")
117 em2d_scores = get_columns(fn_em2d_scores, [1])
118 em2d_scores = em2d_scores[0]
122 print(
"clusters below cutoff", rmsd_cutoff,
"Angstroms")
123 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
125 elems = cluster_set.get_cluster_elements(c)
128 scores_elements.append(em2d_scores[cid])
129 print(
"Cluster", c,
":", elems, scores_elements, end=
' ')
131 min_value, min_index = argmin(scores_elements)
132 min_elem_id = elems[min_index]
134 print(
"representative element", min_elem_id, min_value)
136 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
138 if(i != min_elem_id):
139 print(
"Writing element", i,
"aligned to ", min_elem_id,
":", pdb_name)
145 print(
"Writing representative element", min_elem_id,
":", pdb_name)