11 from __future__
import print_function
24 Clustering of pdb models.
25 This script clusters pdb models of an structure, chosen from a
27 - It is assumed that all the pdb files belong to the same structure
28 and that the order of the atoms in the pdb files is the same in all files
29 - After the clustering procedure, a linkage matrix is generated.
34 if sys.platform ==
'win32':
35 sys.stderr.write(
"this example does not currently work on Windows\n")
39 def get_columns(fn, cols=[], delimiter=" ", comment="#"):
40 """ ge the columns of a file:
41 cols - a list of columns to extract. E.g [0,3,5]
42 If empty, all the columns are extracted
43 lines starting with the comment character are ignored """
44 columns = [[]
for i
in cols]
47 open(fn,
"r"), delimiter=delimiter, skipinitialspace=True)
49 if(row != []
and row[0][0] != comment):
51 for i
in range(0, len(row)):
52 columns[i].append(row[i])
54 for i
in range(0, len(cols)):
55 columns[i].append(row[cols[i]])
60 """ Argmin function: Returns the pair (min_value,min_index),
61 where min_index is the index of the minimum value
63 min_value = sequence[0]
65 for i
in range(0, len(sequence)):
67 if(sequence[i] < min_value):
68 min_value = sequence[i]
71 return min_value, min_index
79 print(
"Reading models ...")
83 fn_models = IMP.em2d.read_selection_file(fn_selection)
84 n_models = len(fn_models)
91 coords.append([x.get_coordinates()
for x
in xyz])
93 print(
"Computing matrix of RMSD ...")
94 rmsds = [[0.0
for i
in range(0, n_models)]
for n
in range(0, n_models)]
95 transformations = [[[]
for i
in range(0, n_models)]
96 for j
in range(0, n_models)]
98 for i
in range(0, n_models):
99 for j
in range(i + 1, n_models):
104 transformations[i][j] = t
105 transformations[j][i] = t.get_inverse()
106 temp = [t.get_transformed(v)
for v
in coords[i]]
112 print(
"Clustering (Complete linkage method)...")
113 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)
114 mat2 = cluster_set.get_linkage_matrix()
115 print(
"Complete Linkage Matrix")
120 em2d_scores = get_columns(fn_em2d_scores, [1])
121 em2d_scores = em2d_scores[0]
125 print(
"clusters below cutoff", rmsd_cutoff,
"Angstroms")
126 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)
128 elems = cluster_set.get_cluster_elements(c)
131 scores_elements.append(em2d_scores[cid])
132 print(
"Cluster", c,
":", elems, scores_elements, end=
' ')
134 min_value, min_index = argmin(scores_elements)
135 min_elem_id = elems[min_index]
137 print(
"representative element", min_elem_id, min_value)
139 pdb_name =
"cluster-%03d-elem-%03d.pdb" % (c, i)
141 if(i != min_elem_id):
142 print(
"Writing element", i,
"aligned to ", min_elem_id,
":", pdb_name)
146 T.apply_index(model, p.get_particle_index())
148 print(
"Writing representative element", min_elem_id,
":", pdb_name)
Restraints using electron microscopy 2D images (class averages).
Strings setup_from_argv(const Strings &argv, std::string description, std::string positional_description, int num_positional)
std::string get_example_path(std::string file_name)
Return the full path to one of this module's example files.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Select all non-alternative ATOM records.
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)