This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
20 Clustering of pdb models.
21 This script clusters pdb models of an structure, chosen from a
23 - It is assumed that all the pdb files belong to the same structure
24 and that the order of the atoms in the pdb files is the same in all files
25 - After the clustering procedure, a linkage matrix is generated.
30 if sys.platform ==
'win32':
31 sys.stderr.write(
"this example does not currently work on Windows\n")
35 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
36 """ ge the columns of a file:
37 cols - a list of columns to extract. E.g [0,3,5]
38 If empty, all the columns are extracted
39 lines starting with the comment character are ignored """
40 columns = [[]
for i
in cols]
43 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
45 if(row != []
and row[0][0] != comment):
47 for i
in range(0, len(row)):
48 columns[i].append(row[i])
50 for i
in range(0, len(cols)):
51 columns[i].append(row[cols[i]])
56 """ Argmin function: Returns the pair (min_value,min_index),
57 where min_index is the index of the minimum value
59 min_value = sequence[0]
61 for i
in range(0, len(sequence)):
63 if(sequence[i] < min_value):
64 min_value = sequence[i]
67 return min_value, min_index
75 print "Reading models ..."
79 fn_models = IMP.em2d.read_selection_file(fn_selection)
80 n_models = len(fn_models)
87 coords.append([x.get_coordinates()
for x
in xyz])
89 print "Computing matrix of RMSD ..."
90 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
91 transformations = [[[]
for i
in range(0, n_models)]
92 for j
in range(0, n_models)]
94 for i
in xrange(0, n_models):
95 for j
in xrange(i + 1, n_models):
100 transformations[i][j] = t
101 transformations[j][i] = t.get_inverse()
102 temp = [t.get_transformed(v)
for v
in coords[i]]
108 print "Clustering (Complete linkage method)..."
109 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)
110 mat2 = cluster_set.get_linkage_matrix()
111 print "Complete Linkage Matrix"
116 em2d_scores = get_columns(fn_em2d_scores, [1])
117 em2d_scores = em2d_scores[0]
121 print "clusters below cutoff", rmsd_cutoff,
"Angstroms"
122 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
124 elems = cluster_set.get_cluster_elements(c)
127 scores_elements.append(em2d_scores[cid])
128 print "Cluster", c,
":", elems, scores_elements,
130 min_value, min_index = argmin(scores_elements)
131 min_elem_id = elems[min_index]
133 print "representative element", min_elem_id, min_value
135 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
137 if(i != min_elem_id):
138 print "Writing element", i,
"aligned to ", min_elem_id,
":", pdb_name
144 print "Writing representative element", min_elem_id,
":", pdb_name