This example clusters pdb models of an structure, chosen from a selection file. It is assumed that all the pdb files belong to the same structure and that the order of the atoms in the pdb files is the same in all files.
After the clustering procedure, a linkage matrix is generated.
19 Clustering of pdb models.
20 This script clusters pdb models of an structure, chosen from a
22 - It is assumed that all the pdb files belong to the same structure
23 and that the order of the atoms in the pdb files is the same in all files
24 - After the clustering procedure, a linkage matrix is generated.
29 if sys.platform ==
'win32':
30 sys.stderr.write(
"this example does not currently work on Windows\n")
34 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
35 """ ge the columns of a file:
36 cols - a list of columns to extract. E.g [0,3,5]
37 If empty, all the columns are extracted
38 lines starting with the comment character are ignored """
39 columns = [[]
for i
in cols]
42 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
44 if(row != []
and row[0][0] != comment):
46 for i
in range(0, len(row)):
47 columns[i].append(row[i])
49 for i
in range(0, len(cols)):
50 columns[i].append(row[cols[i]])
55 """ Argmin function: Returns the pair (min_value,min_index),
56 where min_index is the index of the minimum value
58 min_value = sequence[0]
60 for i
in range(0, len(sequence)):
62 if(sequence[i] < min_value):
63 min_value = sequence[i]
66 return min_value, min_index
71 fn_selection = em2d.get_example_path(
"all-models-1z5s.sel")
72 fn_em2d_scores = em2d.get_example_path(
"em2d_scores_for_clustering.data")
74 print "Reading models ..."
76 ssel = atom.ATOMPDBSelector()
78 fn_models = em2d.read_selection_file(fn_selection)
79 n_models = len(fn_models)
82 fn_model = em2d.get_example_path(fn)
83 h = atom.read_pdb(fn_model, model, ssel,
True)
85 xyz = core.XYZs(atom.get_leaves(h))
86 coords.append([x.get_coordinates()
for x
in xyz])
88 print "Computing matrix of RMSD ..."
89 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
90 transformations = [[[]
for i
in range(0, n_models)]
91 for j
in range(0, n_models)]
93 for i
in xrange(0, n_models):
94 for j
in xrange(i + 1, n_models):
99 transformations[i][j] = t
100 transformations[j][i] = t.get_inverse()
101 temp = [t.get_transformed(v)
for v
in coords[i]]
107 print "Clustering (Complete linkage method)..."
108 cluster_set = em2d.do_hierarchical_clustering_complete_linkage(rmsds)
109 mat2 = cluster_set.get_linkage_matrix()
110 print "Complete Linkage Matrix"
115 em2d_scores = get_columns(fn_em2d_scores, [1])
116 em2d_scores = em2d_scores[0]
120 print "clusters below cutoff", rmsd_cutoff,
"Angstroms"
121 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
123 elems = cluster_set.get_cluster_elements(c)
126 scores_elements.append(em2d_scores[cid])
127 print "Cluster", c,
":", elems, scores_elements,
129 min_value, min_index = argmin(scores_elements)
130 min_elem_id = elems[min_index]
132 print "representative element", min_elem_id, min_value
134 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
136 if(i != min_elem_id):
137 print "Writing element", i,
"aligned to ", min_elem_id,
":", pdb_name
138 T = core.Transform(transformations[i][min_elem_id])
139 ps = atom.get_leaves(hierarchies[i])
143 print "Writing representative element", min_elem_id,
":", pdb_name
144 atom.write_pdb(hierarchies[i], pdb_name)