doc/ref/clustering__of__pdb__models_8py_source.html

 ## \example em2d/clustering_of_pdb_models.py

 # This example clusters pdb models of an structure, chosen from a

 # selection file.

 #

 # It is assumed that all the pdb files belong to the same structure

 # and that the order of the atoms in the pdb files is the same in all files.

 #

 # After the clustering procedure, a linkage matrix is generated.

 #


 from __future__ import print_function

 import IMP

 import IMP.algebra

 import IMP.core

 import IMP.atom

 import IMP.em2d

 import sys

 import csv


 IMP.setup_from_argv(sys.argv, "clustering of PDB models")


 """

     Clustering of pdb models.

     This script clusters pdb models of an structure, chosen from a

     selection file.

     - It is assumed that all the pdb files belong to the same structure

     and that the order of the atoms in the pdb files is the same in all files

     - After the clustering procedure, a linkage matrix is generated.


 """


 if sys.platform == 'win32':

     sys.stderr.write("this example does not currently work on Windows\n")

     sys.exit(0)


 def get_columns(fn, cols=[], delimiter=" ", comment="#"):

     """ ge the columns of a file:

         cols - a list of columns to extract. E.g [0,3,5]

                If empty, all the columns are extracted

         lines starting with the comment character are ignored """

     columns = [[] for i in cols]

     # get a reader for the file

     reader = csv.reader(

         open(fn, "r"), delimiter=delimiter, skipinitialspace=True)

     for row in reader:

         if(row != [] and row[0][0] != comment):  # not empty or comment row

             if(cols == []):

                 for i in range(0, len(row)):

                     columns[i].append(row[i])

             else:

                 for i in range(0, len(cols)):

                     columns[i].append(row[cols[i]])

     return columns


 def argmin(sequence):

     """ Argmin function: Returns the pair (min_value,min_index),

         where min_index is the index of the minimum value

     """

     min_value = sequence[0]

     min_index = 0

     for i in range(0, len(sequence)):

         if(sequence[i] < min_value):

             min_value = sequence[i]

             min_index = i

     return min_value, min_index


 fn_selection = IMP.em2d.get_example_path("all-models-1z5s.sel")

 fn_em2d_scores = IMP.em2d.get_example_path("em2d_scores_for_clustering.data")

 # Load models

 print("Reading models ...")

 model = IMP.Model()

 ssel = IMP.atom.ATOMPDBSelector()

 coords = []

 fn_models = IMP.em2d.read_selection_file(fn_selection)

 n_models = len(fn_models)

 hierarchies = []

 for fn in fn_models:

     fn_model = IMP.em2d.get_example_path(fn)

     h = IMP.atom.read_pdb(fn_model, model, ssel, True)

     hierarchies.append(h)

     xyz = IMP.core.XYZs(IMP.atom.get_leaves(h))

     coords.append([x.get_coordinates() for x in xyz])


 print("Computing matrix of RMSD ...")

 rmsds = [[0.0 for i in range(0, n_models)] for n in range(0, n_models)]

 transformations = [[[] for i in range(0, n_models)]

                    for j in range(0, n_models)]

 # fill rmsd and transformations

 for i in range(0, n_models):

     for j in range(i + 1, n_models):

         if(i != j):

             t = IMP.algebra.get_transformation_aligning_first_to_second(

                 coords[i],

                 coords[j])

             transformations[i][j] = t

             transformations[j][i] = t.get_inverse()

             temp = [t.get_transformed(v) for v in coords[i]]

             rmsd = IMP.algebra.get_rmsd(temp, coords[j])

             rmsds[i][j] = rmsd

             rmsds[j][i] = rmsd


 # cluster

 print("Clustering (Complete linkage method)...")

 cluster_set = IMP.em2d.do_hierarchical_clustering_complete_linkage(rmsds)

 mat2 = cluster_set.get_linkage_matrix()

 print("Complete Linkage Matrix")

 for m in mat2:

     print(m)


 # Read scores from the scores file

 em2d_scores = get_columns(fn_em2d_scores, [1])

 em2d_scores = em2d_scores[0]


 # get biggest clusters below a certain rmsd

 rmsd_cutoff = 1.4

 print("clusters below cutoff", rmsd_cutoff, "Angstroms")

 clusters = cluster_set.get_clusters_below_cutoff(rmsd_cutoff)

 for c in clusters:

     elems = cluster_set.get_cluster_elements(c)

     scores_elements = []

     for cid in elems:

         scores_elements.append(em2d_scores[cid])

     print("Cluster", c, ":", elems, scores_elements, end=' ')

     # find model with best score

     min_value, min_index = argmin(scores_elements)

     min_elem_id = elems[min_index]

     # The representative element is the one with the minimum em2d score

     print("representative element", min_elem_id, min_value)

     for i in elems:

         pdb_name = "cluster-%03d-elem-%03d.pdb" % (c, i)


         if(i != min_elem_id):

             print("Writing element", i, "aligned to ", min_elem_id, ":",

                   pdb_name)

             T = IMP.core.Transform(transformations[i][min_elem_id])

             ps = IMP.atom.get_leaves(hierarchies[i])

             for p in ps:

                 T.apply_index(model, p.get_particle_index())

         else:

             print("Writing representative element", min_elem_id, ":", pdb_name)

         IMP.atom.write_pdb(hierarchies[i], pdb_name)

IMP::em2d
Restraints using electron microscopy 2D images (class averages).

IMP::setup_from_argv
Strings setup_from_argv(const Strings &argv, std::string description, std::string positional_description, int num_positional)

IMP::em2d::get_example_path
std::string get_example_path(std::string file_name)
Return the full path to one of this module's example files.

IMP::atom::write_pdb
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)

IMP::atom::read_pdb
void read_pdb(TextInput input, int model, Hierarchy h)

IMP::Vector< XYZ >

IMP::Model
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86

IMP::atom::ATOMPDBSelector
Select all non-alternative ATOM records.
Definition: pdb.h:128

IMP::algebra::get_rmsd
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Definition: algebra/distance.h:47

IMP::core::Transform
Apply a transformation to a passed particle.
Definition: Transform.h:23

IMP::core
Basic functionality that is expected to be used by a wide variety of IMP users.

IMP::algebra
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...

IMP::algebra::get_transformation_aligning_first_to_second
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)

IMP::atom
Functionality for loading, creating, manipulating and scoring atomic structures.

IMP::atom::get_leaves
Hierarchies get_leaves(const Selection &h)