19 Clustering of pdb models.
20 This script clusters pdb models of an structure, chosen from a
22 - It is assumed that all the pdb files belong to the same structure
23 and that the order of the atoms in the pdb files is the same in all files
24 - After the clustering procedure, a linkage matrix is generated.
29 if sys.platform ==
'win32':
30 sys.stderr.write(
"this example does not currently work on Windows\n")
34 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
35 """ ge the columns of a file:
36 cols - a list of columns to extract. E.g [0,3,5]
37 If empty, all the columns are extracted
38 lines starting with the comment character are ignored """
39 columns = [[]
for i
in cols]
42 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
44 if(row != []
and row[0][0] != comment):
46 for i
in range(0, len(row)):
47 columns[i].append(row[i])
49 for i
in range(0, len(cols)):
50 columns[i].append(row[cols[i]])
55 """ Argmin function: Returns the pair (min_value,min_index),
56 where min_index is the index of the minimum value
58 min_value = sequence[0]
60 for i
in range(0, len(sequence)):
62 if(sequence[i] < min_value):
63 min_value = sequence[i]
66 return min_value, min_index
71 fn_selection = em2d.get_example_path(
"all-models-1z5s.sel")
72 fn_em2d_scores = em2d.get_example_path(
"em2d_scores_for_clustering.data")
74 print "Reading models ..."
76 ssel = atom.ATOMPDBSelector()
78 fn_models = em2d.read_selection_file(fn_selection)
79 n_models = len(fn_models)
82 fn_model = em2d.get_example_path(fn)
83 h = atom.read_pdb(fn_model, model, ssel,
True)
85 xyz = core.XYZs(atom.get_leaves(h))
86 coords.append([x.get_coordinates()
for x
in xyz])
88 print "Computing matrix of RMSD ..."
89 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
90 transformations = [[[]
for i
in range(0, n_models)]
91 for j
in range(0, n_models)]
93 for i
in xrange(0, n_models):
94 for j
in xrange(i + 1, n_models):
99 transformations[i][j] = t
100 transformations[j][i] = t.get_inverse()
101 temp = [t.get_transformed(v)
for v
in coords[i]]
107 print "Clustering (Complete linkage method)..."
108 cluster_set = em2d.do_hierarchical_clustering_complete_linkage(rmsds)
109 mat2 = cluster_set.get_linkage_matrix()
110 print "Complete Linkage Matrix"
115 em2d_scores = get_columns(fn_em2d_scores, [1])
116 em2d_scores = em2d_scores[0]
120 print "clusters below cutoff", rmsd_cutoff,
"Angstroms"
121 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
123 elems = cluster_set.get_cluster_elements(c)
126 scores_elements.append(em2d_scores[cid])
127 print "Cluster", c,
":", elems, scores_elements,
129 min_value, min_index = argmin(scores_elements)
130 min_elem_id = elems[min_index]
132 print "representative element", min_elem_id, min_value
134 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
136 if(i != min_elem_id):
137 print "Writing element", i,
"aligned to ", min_elem_id,
":", pdb_name
138 T = core.Transform(transformations[i][min_elem_id])
139 ps = atom.get_leaves(hierarchies[i])
143 print "Writing representative element", min_elem_id,
":", pdb_name
144 atom.write_pdb(hierarchies[i], pdb_name)
See IMP.em2d for more information.
double get_rmsd(const Selection &s0, const Selection &s1, const IMP::algebra::Transformation3D &tr_for_second=IMP::algebra::get_identity_transformation_3d())
See IMP.core for more information.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
See IMP.atom for more information.
Class for storing model, its restraints, constraints, and particles.