This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
22 Clustering of pdb models.
23 This script clusters pdb models of an structure, chosen from a
25 - It is assumed that all the pdb files belong to the same structure
26 and that the order of the atoms in the pdb files is the same in all files
27 - After the clustering procedure, a linkage matrix is generated.
32 if sys.platform ==
'win32':
33 sys.stderr.write(
"this example does not currently work on Windows\n")
37 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
38 """ ge the columns of a file:
39 cols - a list of columns to extract. E.g [0,3,5]
40 If empty, all the columns are extracted
41 lines starting with the comment character are ignored """
42 columns = [[]
for i
in cols]
45 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
47 if(row != []
and row[0][0] != comment):
49 for i
in range(0, len(row)):
50 columns[i].append(row[i])
52 for i
in range(0, len(cols)):
53 columns[i].append(row[cols[i]])
58 """ Argmin function: Returns the pair (min_value,min_index),
59 where min_index is the index of the minimum value
61 min_value = sequence[0]
63 for i
in range(0, len(sequence)):
64 if(sequence[i] < min_value):
65 min_value = sequence[i]
67 return min_value, min_index
73 print(
"Reading models ...")
77 fn_models = IMP.em2d.read_selection_file(fn_selection)
78 n_models = len(fn_models)
85 coords.append([x.get_coordinates()
for x
in xyz])
87 print(
"Computing matrix of RMSD ...")
88 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
89 transformations = [[[]
for i
in range(0, n_models)]
90 for j
in range(0, n_models)]
92 for i
in range(0, n_models):
93 for j
in range(i + 1, n_models):
98 transformations[i][j] = t
99 transformations[j][i] = t.get_inverse()
100 temp = [t.get_transformed(v)
for v
in coords[i]]
106 print(
"Clustering (Complete linkage method)...")
107 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)
108 mat2 = cluster_set.get_linkage_matrix()
109 print(
"Complete Linkage Matrix")
114 em2d_scores = get_columns(fn_em2d_scores, [1])
115 em2d_scores = em2d_scores[0]
119 print(
"clusters below cutoff", rmsd_cutoff,
"Angstroms")
120 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
122 elems = cluster_set.get_cluster_elements(c)
125 scores_elements.append(em2d_scores[cid])
126 print(
"Cluster", c,
":", elems, scores_elements, end=
' ')
128 min_value, min_index = argmin(scores_elements)
129 min_elem_id = elems[min_index]
131 print(
"representative element", min_elem_id, min_value)
133 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
135 if(i != min_elem_id):
136 print(
"Writing element", i,
"aligned to ", min_elem_id,
":",
141 T.apply_index(model, p.get_particle_index())
143 print(
"Writing representative element", min_elem_id,
":", pdb_name)