IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
comparisons.py
1 """@namespace IMP.EMageFit.imp_general.comparisons
2  Utility functions for comparisons.
3 """
4 
5 
6 import IMP
7 import IMP.algebra as alg
8 import IMP.core as core
9 import IMP.em as em
10 import IMP.atom as atom
11 import IMP.EMageFit.imp_general.alignments as alignments
12 import IMP.EMageFit.imp_general.representation as representation
13 
14 import random
15 import itertools
16 import math
17 import csv
18 import itertools
19 import logging
20 log = logging.getLogger("comparisons")
21 
22 
23 def get_coordinates(hierarchy):
24  xyz = [core.XYZ(l) for l in atom.get_leaves(hierarchy)]
25  coords = [x.get_coordinates() for x in xyz]
26  return coords
27 
28 
29 def get_assembly_placement_score(assembly, native_assembly, align=False):
30  """
31  Computes the placement score of an assembly respect to the native_assembly.
32  @param assembly An atom.Molecule object
33  @param native_assembly An atom.Molecule object
34  @param align If True, assembly is aligned to native_assembly before
35  calculating the placement score
36  """
37 
38  distances, angles = get_components_placement_scores(assembly,
39  native_assembly, align)
40  n = 1. * len(distances)
41  return sum(distances) / n, sum(angles) / n
42 
43 
44 def get_components_placement_scores(assembly, native_assembly, align=False):
45  """
46  Compute the placement score of each of the children of an assembly.
47  The function does not do any time of alignment of the coordinates
48  @param assembly An atom.Molecule object
49  @param native_assembly An atom.Molecule object with the native conformation
50  Obviously the atoms in assembly and native assembly
51  must be the same
52  @param align If True, assembly is aligned to native_assembly before
53  calculating the placement score
54  @return The function returns 2 lists. The first list contains the
55  placement distances of the children. The second list contains the
56  placnement angles
57  """
58  model_coords_per_child = [get_coordinates(c)
59  for c in assembly.get_children()]
60  native_coords_per_child = [get_coordinates(c)
61  for c in native_assembly.get_children()]
62  if align:
63  model_coords = []
64  nil = [model_coords.extend(x) for x in model_coords_per_child]
65  native_coords = []
66  nil = [native_coords.extend(x) for x in native_coords_per_child]
67  T = alg.get_transformation_aligning_first_to_second(model_coords,
68  native_coords)
69  # get aligned coordinates
70  new_model_coords_per_child = []
71  for c in model_coords_per_child:
72  coords = [T.get_transformed(x) for x in c]
73  new_model_coords_per_child.append(coords)
74  model_coords_per_child = new_model_coords_per_child
75  distances, angles = get_placement_scores_from_coordinates(
76  native_coords_per_child, model_coords_per_child)
77  return distances, angles
78 
79 
80 def get_placement_scores_from_coordinates(model_components_coords,
81  native_components_coords):
82  """
83  Computes the placement score for each of the components
84  @param model_components_coords A list with the coordinates for each
85  component
86  @param native_components_coords A list with the coordinates for each
87  component in the native assembly
88  """
89  distances = []
90  angles = []
91  for model_coords, native_coords in zip(
92  model_components_coords, native_components_coords):
93  distance, angle = get_placement_score_from_coordinates(model_coords,
94  native_coords)
95  distances.append(distance)
96  angles.append(angle)
97  return distances, angles
98 
99 
100 def get_placement_score_from_coordinates(model_coords, native_coords):
101  """
102  Computes the position error (placement distance) and the orientation
103  error (placement angle) of the coordinates in model_coords respect to
104  the coordinates in native_coords.
105  placement distance - translation between the centroids of the
106  coordinates
107  placement angle - Angle in the axis-angle formulation of the rotation
108  aligning the two rigid bodies.
109  """
110  native_centroid = alg.get_centroid(native_coords)
111  model_centroid = alg.get_centroid(model_coords)
112  translation_vector = native_centroid - model_centroid
113  distance = translation_vector.get_magnitude()
114  if(len(model_coords) != len(native_coords)):
115  raise ValueError(
116  "Mismatch in the number of members %d %d " % (
117  len(model_coords),
118  len(native_coords)))
119  TT = alg.get_transformation_aligning_first_to_second(model_coords,
120  native_coords)
121  P = alg.get_axis_and_angle(TT.get_rotation())
122  angle = P.second
123  return distance, angle
124 
125 
126 def get_rmsd(hierarchy1, hierarchy2):
127  xyz1 = [core.XYZ(l) for l in atom.get_leaves(hierarchy1)]
128  xyz2 = [core.XYZ(l) for l in atom.get_leaves(hierarchy2)]
129  return atom.get_rmsd(xyz1, xyz2)
130 
131 
132 def get_ccc(native_assembly, assembly, resolution, voxel_size,
133  threshold, write_maps=False):
134  """
135  Threshold - threshold used for the map of the native assembly. Pixels
136  with values above this threshold in the native map are used for the
137  calculation of the cross_correlation_coefficient
138  """
139  import IMP.em as em
140  particles_native = atom.get_leaves(native_assembly)
141  particles_solution = atom.get_leaves(assembly)
142  bb_native = core.get_bounding_box(core.XYZs(particles_native))
143  bb_solution = core.get_bounding_box(core.XYZs(particles_solution))
144  # bounding box enclosing both the particles of the native assembly
145  # and the particles of the model
146  bb_union = alg.get_union(bb_native, bb_solution)
147  # add border of 4 voxels
148  border = 4 * voxel_size
149  bottom = bb_union.get_corner(0)
150  bottom += alg.Vector3D(-border, -border, -border)
151  top = bb_union.get_corner(1)
152  top += alg.Vector3D(border, border, border)
153  bb_union = alg.BoundingBox3D(bottom, top)
154 
155  mrw = em.MRCReaderWriter()
156  header = em.create_density_header(bb_union, voxel_size)
157  header.set_resolution(resolution)
158 
159  map_native = em.SampledDensityMap(header)
160  map_native.set_particles(particles_native)
161  map_native.resample()
162 
163  map_solution = em.SampledDensityMap(header)
164  map_solution.set_particles(particles_solution)
165  map_solution.resample()
166 
167  if(write_maps):
168  em.write_map(map_solution, "map_solution.mrc", mrw)
169  em.write_map(map_native, "map_native.mrc", mrw)
170  map_native.calcRMS()
171  map_solution.calcRMS()
172  coarse_cc = em.CoarseCC()
173  # base the calculation of the cross_correlation coefficient on the threshold]
174  # for the native map, because the threshold for the map of the model changes
175  # with each model
176  threshold = 0.25 # threshold AFTER normalization using calcRMS()
177  ccc = coarse_cc.cross_correlation_coefficient(map_solution,
178  map_native, threshold)
179  log.debug("cross_correlation_coefficient (based on native_map "
180  "treshold %s) %s", threshold, ccc)
181  return ccc
182 
183 
184 def get_drms_for_backbone(assembly, native_assembly):
185  """
186  Measure the DRMS ob the backbone between two assemblies.
187  @param assembly The DRMS is computed for this assembly
188  @param native_assembly The assembly that acts as a reference
189 
190  Notes:
191  1) The components of the assembly can be proteins or nucleic acids
192  2) If a protein, the c-alphas are used for calculating the drms
193  3) If a nucleic acid, the backbone of C4' atoms is used
194  4) The chains are treated as rigid bodies to speed the calculation.
195 
196  WARNING: if the function fails with a segmentation fault, one of the
197  possible problems is that IMP reads some HETATM as calphas. Check that
198  the chain does not have heteroatoms.
199  """
200  log.debug("Measuring DRMS of the backbone")
201  begin_range = 0
202  ranges = []
203  backbone = []
204  h_chains = atom.get_by_type(assembly, atom.CHAIN_TYPE)
205  for h in h_chains:
206  atoms = representation.get_backbone(h)
207  """"
208  for a in atoms:
209  print "atom ===> ",
210  at = atom.Atom(a)
211  hr = at.get_parent()
212  res = atom.Residue(hr)
213  ch = atom.Chain(h)
214  ch.show()
215  print " - ",
216  res.show()
217  print " - ",
218  at.show()
219  print ""
220  """
221  backbone.extend(atoms)
222  end_range = begin_range + len(atoms)
223  ranges.append((begin_range, end_range))
224  begin_range = end_range
225  log.debug("Ranges %s number of atoms %s", ranges, len(backbone))
226  xyzs = [core.XYZ(l) for l in backbone]
227  native_chains = atom.get_by_type(native_assembly, atom.CHAIN_TYPE)
228  names = [atom.Chain(ch).get_id() for ch in native_chains]
229  native_backbone = []
230  for h in native_chains:
231  native_backbone.extend(representation.get_backbone(h))
232  native_xyzs = [core.XYZ(l) for l in native_backbone]
233  if len(xyzs) != len(native_xyzs):
234  raise ValueError(
235  "Cannot compute DRMS for sets of atoms of different size")
236  log.debug("Getting rigid bodies rmsd")
237  drms = atom.get_rigid_bodies_drms(xyzs, native_xyzs, ranges)
238  if drms < 0 or math.isnan(drms): # or drms > 100:
239  log.debug(
240  "len(xyzs) = %s. len(native_xyzs) = %s",
241  len(xyzs),
242  len(native_xyzs))
243  log.debug("drms = %s", drms)
244  atom.write_pdb(assembly, "drms_model_calphas.pdb")
245  atom.write_pdb(native_assembly, "drms_native_calphas.pdb")
246  raise ValueError("There is a problem with the drms. I wrote the pdbs "
247  "for you: drms_model_calphas.pdb drms_native_calphas.pdb")
248  return drms
def get_assembly_placement_score
Computes the placement score of an assembly respect to the native_assembly.
Definition: comparisons.py:29
def get_placement_scores_from_coordinates
Computes the placement score for each of the components.
Definition: comparisons.py:80
Utility functions to handle representation.
Utility functions to handle alignments.
Definition: alignments.py:1
def get_drms_for_backbone
Measure the DRMS ob the backbone between two assemblies.
Definition: comparisons.py:184
Responsible for performing coarse fitting between two density objects.
Definition: CoarseCC.h:28
Class for sampling a density map from particles.
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
def get_ccc
Threshold - threshold used for the map of the native assembly.
Definition: comparisons.py:132
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_placement_score_from_coordinates
Computes the position error (placement distance) and the orientation error (placement angle) of the c...
Definition: comparisons.py:100
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Store info for a chain of a protein.
Definition: Chain.h:21
def get_components_placement_scores
Compute the placement score of each of the children of an assembly.
Definition: comparisons.py:44
Functionality for loading, creating, manipulating and scoring atomic structures.