IMP logo
IMP Reference Guide  develop.e6c37e56ed,2022/01/25
The Integrative Modeling Platform
comparisons.py
1 """@namespace IMP.EMageFit.imp_general.comparisons
2  Utility functions for comparisons.
3 """
4 
5 
6 import IMP
7 import IMP.algebra
8 import IMP.core
9 import IMP.em
10 import IMP.atom
11 import IMP.EMageFit.imp_general.representation as representation
12 
13 import random
14 import itertools
15 import math
16 import csv
17 import itertools
18 import logging
19 log = logging.getLogger("comparisons")
20 
21 
22 def get_coordinates(hierarchy):
23  xyz = [IMP.core.XYZ(l) for l in IMP.atom.get_leaves(hierarchy)]
24  coords = [x.get_coordinates() for x in xyz]
25  return coords
26 
27 
28 def get_assembly_placement_score(assembly, native_assembly, align=False):
29  """
30  Computes the placement score of an assembly respect to the native_assembly.
31  @param assembly An IMP.atom.Molecule object
32  @param native_assembly An IMP.atom.Molecule object
33  @param align If True, assembly is aligned to native_assembly before
34  calculating the placement score
35  """
36 
37  distances, angles = get_components_placement_scores(assembly,
38  native_assembly, align)
39  n = 1. * len(distances)
40  return sum(distances) / n, sum(angles) / n
41 
42 
43 def get_components_placement_scores(assembly, native_assembly, align=False):
44  """
45  Compute the placement score of each of the children of an assembly.
46  The function does not do any time of alignment of the coordinates
47  @param assembly An IMP.atom.Molecule object
48  @param native_assembly An IMP.atom.Molecule object with the native conformation
49  Obviously the atoms in assembly and native assembly
50  must be the same
51  @param align If True, assembly is aligned to native_assembly before
52  calculating the placement score
53  @return The function returns 2 lists. The first list contains the
54  placement distances of the children. The second list contains the
55  placement angles
56  """
57  model_coords_per_child = [get_coordinates(c)
58  for c in assembly.get_children()]
59  native_coords_per_child = [get_coordinates(c)
60  for c in native_assembly.get_children()]
61  if align:
62  model_coords = []
63  nil = [model_coords.extend(x) for x in model_coords_per_child]
64  native_coords = []
65  nil = [native_coords.extend(x) for x in native_coords_per_child]
67  model_coords, native_coords)
68  # get aligned coordinates
69  new_model_coords_per_child = []
70  for c in model_coords_per_child:
71  coords = [T.get_transformed(x) for x in c]
72  new_model_coords_per_child.append(coords)
73  model_coords_per_child = new_model_coords_per_child
74  distances, angles = get_placement_scores_from_coordinates(
75  native_coords_per_child, model_coords_per_child)
76  return distances, angles
77 
78 
79 def get_placement_scores_from_coordinates(model_components_coords,
80  native_components_coords):
81  """
82  Computes the placement score for each of the components
83  @param model_components_coords A list with the coordinates for each
84  component
85  @param native_components_coords A list with the coordinates for each
86  component in the native assembly
87  """
88  distances = []
89  angles = []
90  for model_coords, native_coords in zip(
91  model_components_coords, native_components_coords):
92  distance, angle = get_placement_score_from_coordinates(model_coords,
93  native_coords)
94  distances.append(distance)
95  angles.append(angle)
96  return distances, angles
97 
98 
99 def get_placement_score_from_coordinates(model_coords, native_coords):
100  """
101  Computes the position error (placement distance) and the orientation
102  error (placement angle) of the coordinates in model_coords respect to
103  the coordinates in native_coords.
104  placement distance - translation between the centroids of the
105  coordinates
106  placement angle - Angle in the axis-angle formulation of the rotation
107  aligning the two rigid bodies.
108  """
109  native_centroid = IMP.algebra.get_centroid(native_coords)
110  model_centroid = IMP.algebra.get_centroid(model_coords)
111  translation_vector = native_centroid - model_centroid
112  distance = translation_vector.get_magnitude()
113  if(len(model_coords) != len(native_coords)):
114  raise ValueError(
115  "Mismatch in the number of members %d %d " % (
116  len(model_coords),
117  len(native_coords)))
119  native_coords)
120  P = IMP.algebra.get_axis_and_angle(TT.get_rotation())
121  angle = P.second
122  return distance, angle
123 
124 
125 def get_rmsd(hierarchy1, hierarchy2):
126  xyz1 = [IMP.core.XYZ(l) for l in IMP.atom.get_leaves(hierarchy1)]
127  xyz2 = [IMP.core.XYZ(l) for l in IMP.atom.get_leaves(hierarchy2)]
128  return IMP.atom.get_rmsd(xyz1, xyz2)
129 
130 
131 def get_ccc(native_assembly, assembly, resolution, voxel_size,
132  threshold, write_maps=False):
133  """
134  Threshold - threshold used for the map of the native assembly. Pixels
135  with values above this threshold in the native map are used for the
136  calculation of the cross_correlation_coefficient
137  """
138  import IMP.em as em
139  particles_native = IMP.atom.get_leaves(native_assembly)
140  particles_solution = IMP.atom.get_leaves(assembly)
141  bb_native = IMP.core.get_bounding_box(IMP.core.XYZs(particles_native))
142  bb_solution = IMP.core.get_bounding_box(IMP.core.XYZs(particles_solution))
143  # bounding box enclosing both the particles of the native assembly
144  # and the particles of the model
145  bb_union = IMP.algebra.get_union(bb_native, bb_solution)
146  # add border of 4 voxels
147  border = 4 * voxel_size
148  bottom = bb_union.get_corner(0)
149  bottom += IMP.algebra.Vector3D(-border, -border, -border)
150  top = bb_union.get_corner(1)
151  top += IMP.algebra.Vector3D(border, border, border)
152  bb_union = IMP.algebra.BoundingBox3D(bottom, top)
153 
154  mrw = IMP.em.MRCReaderWriter()
155  header = IMP.em.create_density_header(bb_union, voxel_size)
156  header.set_resolution(resolution)
157 
158  map_native = IMP.em.SampledDensityMap(header)
159  map_native.set_particles(particles_native)
160  map_native.resample()
161 
162  map_solution = IMP.em.SampledDensityMap(header)
163  map_solution.set_particles(particles_solution)
164  map_solution.resample()
165 
166  if(write_maps):
167  IMP.em.write_map(map_solution, "map_solution.mrc", mrw)
168  IMP.em.write_map(map_native, "map_native.mrc", mrw)
169  map_native.calcRMS()
170  map_solution.calcRMS()
171  # base the calculation of the cross_correlation coefficient on the threshold]
172  # for the native map, because the threshold for the map of the model changes
173  # with each model
174  threshold = 0.25 # threshold AFTER normalization using calcRMS()
175  ccc = IMP.em.get_coarse_cc_coefficient(map_solution,
176  map_native, threshold)
177  log.debug("cross_correlation_coefficient (based on native_map "
178  "threshold %s) %s", threshold, ccc)
179  return ccc
180 
181 
182 def get_drms_for_backbone(assembly, native_assembly):
183  """
184  Measure the DRMS ob the backbone between two assemblies.
185  @param assembly The DRMS is computed for this assembly
186  @param native_assembly The assembly that acts as a reference
187 
188  Notes:
189  1) The components of the assembly can be proteins or nucleic acids
190  2) If a protein, the c-alphas are used for calculating the drms
191  3) If a nucleic acid, the backbone of C4' atoms is used
192  4) The chains are treated as rigid bodies to speed the calculation.
193 
194  WARNING: if the function fails with a segmentation fault, one of the
195  possible problems is that IMP reads some HETATM as calphas. Check that
196  the chain does not have heteroatoms.
197  """
198  log.debug("Measuring DRMS of the backbone")
199  begin_range = 0
200  ranges = []
201  backbone = []
202  h_chains = IMP.atom.get_by_type(assembly, IMP.atom.CHAIN_TYPE)
203  for h in h_chains:
204  atoms = representation.get_backbone(h)
205  """"
206  for a in atoms:
207  print "atom ===> ",
208  at = IMP.atom.Atom(a)
209  hr = at.get_parent()
210  res = IMP.atom.Residue(hr)
211  ch = IMP.atom.Chain(h)
212  ch.show()
213  print " - ",
214  res.show()
215  print " - ",
216  at.show()
217  print ""
218  """
219  backbone.extend(atoms)
220  end_range = begin_range + len(atoms)
221  ranges.append((begin_range, end_range))
222  begin_range = end_range
223  log.debug("Ranges %s number of atoms %s", ranges, len(backbone))
224  xyzs = [IMP.core.XYZ(l) for l in backbone]
225  native_chains = IMP.atom.get_by_type(native_assembly, IMP.atom.CHAIN_TYPE)
226  names = [IMP.atom.Chain(ch).get_id() for ch in native_chains]
227  native_backbone = []
228  for h in native_chains:
229  native_backbone.extend(representation.get_backbone(h))
230  native_xyzs = [IMP.core.XYZ(l) for l in native_backbone]
231  if len(xyzs) != len(native_xyzs):
232  raise ValueError(
233  "Cannot compute DRMS for sets of atoms of different size")
234  log.debug("Getting rigid bodies rmsd")
235  drms = IMP.atom.get_rigid_bodies_drms(xyzs, native_xyzs, ranges)
236  if drms < 0 or math.isnan(drms): # or drms > 100:
237  log.debug(
238  "len(xyzs) = %s. len(native_xyzs) = %s",
239  len(xyzs),
240  len(native_xyzs))
241  log.debug("drms = %s", drms)
242  IMP.atom.write_pdb(assembly, "drms_model_calphas.pdb")
243  IMP.atom.write_pdb(native_assembly, "drms_native_calphas.pdb")
244  raise ValueError("There is a problem with the drms. I wrote the pdbs "
245  "for you: drms_model_calphas.pdb drms_native_calphas.pdb")
246  return drms
double get_rigid_bodies_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const IMP::IntRanges &ranges)
DRMS between two sets of rigid bodies.
def get_assembly_placement_score
Computes the placement score of an assembly respect to the native_assembly.
Definition: comparisons.py:28
def get_placement_scores_from_coordinates
Computes the placement score for each of the components.
Definition: comparisons.py:79
double get_coarse_cc_coefficient(const DensityMap *grid1, const DensityMap *grid2, double grid2_voxel_data_threshold, bool allow_padding=false, FloatPair norm_factors=FloatPair(0., 0.))
Calculates the cross correlation coefficient between two maps.
Utility functions to handle representation.
BoundingBoxD< D > get_union(BoundingBoxD< D > a, const BoundingBoxD< D > &b)
Return the union bounding box.
Definition: BoundingBoxD.h:254
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
def get_drms_for_backbone
Measure the DRMS ob the backbone between two assemblies.
Definition: comparisons.py:182
Class for sampling a density map from particles.
double get_rmsd(const Selection &s0, const Selection &s1)
DensityHeader create_density_header(const algebra::BoundingBoxD< 3 > &bb, float spacing)
Create a header from a bounding box in 3D.
algebra::BoundingBoxD< 3 > get_bounding_box(const XYZRs &ps)
Get the bounding box.
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
def get_ccc
Threshold - threshold used for the map of the native assembly.
Definition: comparisons.py:131
std::pair< Vector3D, double > get_axis_and_angle(const Rotation3D &rot)
Decompose a Rotation3D object into a rotation around an axis.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_placement_score_from_coordinates
Computes the position error (placement distance) and the orientation error (placement angle) of the c...
Definition: comparisons.py:99
Vector3D get_centroid(const Vector3Ds &ps)
Return the centroid of a set of vectors.
Definition: Vector3D.h:68
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Store info for a chain of a protein.
Definition: Chain.h:61
def get_components_placement_scores
Compute the placement score of each of the children of an assembly.
Definition: comparisons.py:43
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)