IMP  2.1.1
The Integrative Modeling Platform
reference.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Compare output models to a reference structure."
4 
5 #analyse the ensemble, first we will do the rmsd stuff
6 import IMP.multifit
7 from IMP import OptionParser
8 
9 
10 def get_placement_scores_from_coordinates(model_components_coords,
11  native_components_coords):
12  """
13  Computes the placement score for each of the components
14  @param model_components_coords A list with the coordinates for each
15  component
16  @param native_components_coords A list with the coordinates for each
17  component in the native assembly
18  """
19  distances = []
20  angles = []
21  for model_coords, native_coords in zip(
22  model_components_coords,native_components_coords):
23  distance, angle = get_placement_score_from_coordinates(model_coords,
24  native_coords)
25  distances.append(distance)
26  angles.append(angle)
27  return distances, angles
28 
29 
30 def get_placement_score_from_coordinates(model_coords, native_coords):
31  """
32  Computes the position error (placement distance) and the orientation
33  error (placement angle) of the coordinates in model_coords respect to
34  the coordinates in native_coords.
35  placement distance - translation between the centroids of the
36  coordinates
37  placement angle - Angle in the axis-angle formulation of the rotation
38  aligning the two rigid bodies.
39  """
40  native_centroid = IMP.algebra.get_centroid(native_coords)
41  model_centroid = IMP.algebra.get_centroid(model_coords)
42  translation_vector = native_centroid - model_centroid
43  distance = translation_vector.get_magnitude()
44  if(len(model_coords) != len(native_coords) ):
45  raise ValueError(
46  "Mismatch in the number of members %d %d " % (
47  len(model_coords),
48  len(native_coords)) )
50  native_coords)
51  P = IMP.algebra.get_axis_and_angle( TT.get_rotation() )
52  angle = P.second
53  return distance, angle
54 
55 
56 def get_rmsd(hierarchy1, hierarchy2):
57  xyz1 = [IMP.core.XYZ(l) for l in IMP.atom.get_leaves(hierarchy1)]
58  xyz2 = [IMP.core.XYZ(l) for l in IMP.atom.get_leaves(hierarchy2)]
59  return IMP.atom.get_rmsd(xyz1, xyz2)
60 
61 
62 
63 def get_components_placement_scores(assembly, native_assembly, align=False):
64  """
65  Compute the placement score of each of the children of an assembly.
66  @param assembly An atom.Molecule object
67  @param native_assembly An atom.Molecule object with the native
68  conformation. Obviously the atoms in assembly and
69  native_assembly must be the same.
70  @param align if True, the coordinates are aligned before the score is
71  calculated.
72  @return The function returns 2 lists. The first list contains the
73  placement distances of the children. The second list contains
74  the placement angles
75  """
76  model_coords_per_child = [get_coordinates(c) for c in assembly.get_children()]
77  native_coords_per_child = [get_coordinates(c) for c in native_assembly.get_children()]
78  if align:
79  model_coords = []
80  nil = [model_coords.extend(x) for x in model_coords_per_child]
81  native_coords = []
82  nil = [native_coords.extend(x) for x in native_coords_per_child]
83  T = alg.get_transformation_aligning_first_to_second(model_coords,
84  native_coords)
85  # get aligned coordinates
86  new_model_coords_per_child = []
87  for c in model_coords_per_child:
88  coords = [T.get_transformed(x) for x in c]
89  new_model_coords_per_child.append(coords)
90  model_coords_per_child = new_model_coords_per_child
91  distances, angles = get_placement_scores_from_coordinates(
92  native_coords_per_child, model_coords_per_child)
93  return distances, angles
94 
95 
96 def parse_args():
97  usage = """%prog [options] <asmb.input> <proteomics.input>
98  <mapping.input> <combinations>
99 
100 Compare output models to a reference structure.
101 The reference structure for each subunit is read from the rightmost column
102 of the asmb.input file.
103 """
104  parser = OptionParser(usage)
105  parser.add_option("-m", "--max", type="int", dest="max", default=None,
106  help="maximum number of models to compare")
107  (options, args) = parser.parse_args()
108  if len(args) != 4:
109  parser.error("incorrect number of arguments")
110  return options,args
111 
112 def run(asmb_fn,proteomics_fn,mapping_fn,combs_fn,max_comb):
113  #get rmsd for subunits
114  mdl=IMP.kernel.Model()
115  combs=IMP.multifit.read_paths(combs_fn)
116  sd=IMP.multifit.read_settings(asmb_fn)
117  sd.set_was_used(True)
118  prot_data=IMP.multifit.read_proteomics_data(proteomics_fn)
119  mapping_data=IMP.multifit.read_protein_anchors_mapping(prot_data,
120  mapping_fn)
121  ensmb=IMP.multifit.load_ensemble(sd,mdl,mapping_data)
122  ensmb.set_was_used(True)
123  mhs=ensmb.get_molecules()
124  mhs_ref=[]
125  for j,mh in enumerate(mhs):
126  mhs_ref.append(IMP.atom.read_pdb(sd.get_component_header(j).get_reference_fn(),mdl))
127  print "number of combinations:",len(combs),max_comb
128  results = []
129  for i,comb in enumerate(combs[:max_comb]):
130  if i%500==0:
131  print i
132  ensmb.load_combination(comb)
133  scores=[]
134  for j,mh in enumerate(mhs):
135  mh_ref=mhs_ref[j]
136  coords1=[]
137  for xyz in IMP.core.XYZs(IMP.core.get_leaves(mh)):
138  coords1.append(xyz.get_coordinates())
139  coords2=[]
140  for xyz in IMP.core.XYZs(IMP.core.get_leaves(mh_ref)):
141  coords2.append(xyz.get_coordinates())
142  scores.append(get_placement_score_from_coordinates(coords1,coords2))
143  #scores=get_placement_score_from_coordinates(IMP.core.XYZs(IMP.atom.get_leaves(mh)),
144  # IMP.core.XYZs(IMP.atom.get_leaves(mh_ref)))
145 
146  rmsd=get_rmsd(mhs,mhs_ref)
147  print i,rmsd,scores
148  results.append((rmsd, scores))
149  ensmb.unload_combination(comb)
150  return results
151 
152 def main():
153  options,args = parse_args()
154  return run(args[0],args[1],args[2],args[3],options.max)
155 
156 if __name__ == "__main__":
157  main()
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const IMP::algebra::Transformation3D &tr_for_second=IMP::algebra::get_identity_transformation_3d())
Calculate the root mean square deviation between two sets of 3D points.
Definition: distance.h:28
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
double get_rmsd(const Selection &s0, const Selection &s1, const IMP::algebra::Transformation3D &tr_for_second=IMP::algebra::get_identity_transformation_3d())
Definition: distance.h:47
Ensemble * load_ensemble(multifit::SettingsData *sd, Model *mdl, const ProteinsAnchorsSamplingSpace &mapping_data)
See IMP.multifit for more information.
std::pair< Vector3D, double > get_axis_and_angle(const Rotation3D &rot)
Decompose a Rotation3D object into a rotation around an axis.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:32
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
Vector3D get_centroid(const Vector3Ds &ps)
Returns the centroid of a set of vectors.
Definition: Vector3D.h:56
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
void read_pdb(base::TextInput input, int model, Hierarchy h)
Hierarchies get_leaves(const Selection &h)
Class for storing model, its restraints, constraints, and particles.