IMP logo
IMP Reference Guide  2.21.0
The Integrative Modeling Platform
reference.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 import IMP.multifit
5 from IMP import ArgumentParser
6 
7 __doc__ = "Compare output models to a reference structure."
8 
9 # analyse the ensemble, first we will do the rmsd stuff
10 
11 
12 def get_placement_scores_from_coordinates(model_components_coords,
13  native_components_coords):
14  """
15  Computes the placement score for each of the components
16  @param model_components_coords A list with the coordinates for each
17  component
18  @param native_components_coords A list with the coordinates for each
19  component in the native assembly
20  """
21  distances = []
22  angles = []
23  for model_coords, native_coords in zip(
24  model_components_coords, native_components_coords):
25  distance, angle = get_placement_score_from_coordinates(model_coords,
26  native_coords)
27  distances.append(distance)
28  angles.append(angle)
29  return distances, angles
30 
31 
32 def get_placement_score_from_coordinates(model_coords, native_coords):
33  """
34  Computes the position error (placement distance) and the orientation
35  error (placement angle) of the coordinates in model_coords respect to
36  the coordinates in native_coords.
37  placement distance - translation between the centroids of the
38  coordinates
39  placement angle - Angle in the axis-angle formulation of the rotation
40  aligning the two rigid bodies.
41  """
42  native_centroid = IMP.algebra.get_centroid(native_coords)
43  model_centroid = IMP.algebra.get_centroid(model_coords)
44  translation_vector = native_centroid - model_centroid
45  distance = translation_vector.get_magnitude()
46  if (len(model_coords) != len(native_coords)):
47  raise ValueError(
48  "Mismatch in the number of members %d %d " % (
49  len(model_coords),
50  len(native_coords)))
52  native_coords)
53  P = IMP.algebra.get_axis_and_angle(TT.get_rotation())
54  angle = P.second
55  return distance, angle
56 
57 
58 def get_rmsd(hierarchy1, hierarchy2):
59  xyz1 = [IMP.core.XYZ(leaf) for leaf in IMP.atom.get_leaves(hierarchy1)]
60  xyz2 = [IMP.core.XYZ(leaf) for leaf in IMP.atom.get_leaves(hierarchy2)]
61  return IMP.atom.get_rmsd(xyz1, xyz2)
62 
63 
64 def get_components_placement_scores(assembly, native_assembly, align=False):
65  """
66  Compute the placement score of each of the children of an assembly.
67  @param assembly An atom.Molecule object
68  @param native_assembly An atom.Molecule object with the native
69  conformation. Obviously the atoms in assembly and
70  native_assembly must be the same.
71  @param align if True, the coordinates are aligned before the score is
72  calculated.
73  @return The function returns 2 lists. The first list contains the
74  placement distances of the children. The second list contains
75  the placement angles
76  """
77  model_coords_per_child = [get_coordinates(c) # noqa: F821
78  for c in assembly.get_children()]
79  native_coords_per_child = [get_coordinates(c) # noqa: F821
80  for c in native_assembly.get_children()]
81  if align:
82  model_coords = []
83  _ = [model_coords.extend(x) for x in model_coords_per_child]
84  native_coords = []
85  _ = [native_coords.extend(x) for x in native_coords_per_child]
87  model_coords, native_coords)
88  # get aligned coordinates
89  new_model_coords_per_child = []
90  for c in model_coords_per_child:
91  coords = [T.get_transformed(x) for x in c]
92  new_model_coords_per_child.append(coords)
93  model_coords_per_child = new_model_coords_per_child
94  distances, angles = get_placement_scores_from_coordinates(
95  native_coords_per_child, model_coords_per_child)
96  return distances, angles
97 
98 
99 def parse_args():
100  desc = """
101 Compare output models to a reference structure.
102 The reference structure for each subunit is read from the rightmost column
103 of the asmb.input file.
104 """
105  p = ArgumentParser(description=desc)
106  p.add_argument("-m", "--max", type=int, dest="max", default=None,
107  help="maximum number of models to compare")
108  p.add_argument("assembly_file", help="assembly file name")
109  p.add_argument("proteomics_file", help="proteomics file name")
110  p.add_argument("mapping_file", help="mapping file name")
111  p.add_argument("combinations_file", help="combinations file name")
112  return p.parse_args()
113 
114 
115 def run(asmb_fn, proteomics_fn, mapping_fn, combs_fn, max_comb):
116  # get rmsd for subunits
117  mdl = IMP.Model()
118  combs = IMP.multifit.read_paths(combs_fn)
119  sd = IMP.multifit.read_settings(asmb_fn)
120  sd.set_was_used(True)
121  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
122  mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
123  mapping_fn)
124  ensmb = IMP.multifit.load_ensemble(sd, mdl, mapping_data)
125  ensmb.set_was_used(True)
126  mhs = ensmb.get_molecules()
127  mhs_ref = []
128  for j, mh in enumerate(mhs):
129  mhs_ref.append(
131  sd.get_component_header(
132  j).get_reference_fn(
133  ),
134  mdl))
135  print("number of combinations:", len(combs), max_comb)
136  results = []
137  for i, comb in enumerate(combs[:max_comb]):
138  if i % 500 == 0:
139  print(i)
140  ensmb.load_combination(comb)
141  scores = []
142  for j, mh in enumerate(mhs):
143  mh_ref = mhs_ref[j]
144  coords1 = []
145  for xyz in IMP.core.XYZs(IMP.core.get_leaves(mh)):
146  coords1.append(xyz.get_coordinates())
147  coords2 = []
148  for xyz in IMP.core.XYZs(IMP.core.get_leaves(mh_ref)):
149  coords2.append(xyz.get_coordinates())
150  scores.append(
151  get_placement_score_from_coordinates(coords1, coords2))
152  # scores=get_placement_score_from_coordinates(IMP.core.XYZs(IMP.atom.get_leaves(mh)),
153  # IMP.core.XYZs(IMP.atom.get_leaves(mh_ref)))
154 
155  rmsd = get_rmsd(mhs, mhs_ref)
156  print(i, rmsd, scores)
157  results.append((rmsd, scores))
158  ensmb.unload_combination(comb)
159  return results
160 
161 
162 def main():
163  args = parse_args()
164  return run(args.assembly_file, args.proteomics_file, args.mapping_file,
165  args.combinations_file, args.max)
166 
167 
168 if __name__ == "__main__":
169  main()
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
double get_rmsd(const Selection &s0, const Selection &s1)
Ensemble * load_ensemble(multifit::SettingsData *sd, Model *mdl, const ProteinsAnchorsSamplingSpace &mapping_data)
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Fitting atomic structures into a cryo-electron microscopy density map.
std::pair< Vector3D, double > get_axis_and_angle(const Rotation3D &rot)
Decompose a Rotation3D object into a rotation around an axis.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
Vector3D get_centroid(const Vector3Ds &ps)
Return the centroid of a set of vectors.
Definition: Vector3D.h:68
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)