IMP logo
IMP Reference Guide  2.15.0
The Integrative Modeling Platform
rmsd_calculation.py
1 from __future__ import print_function
2 import pyRMSD.RMSDCalculator
3 from pyRMSD.condensedMatrix import CondensedMatrix
4 
5 import numpy as np
6 import os
7 import glob
8 
9 import IMP
10 import IMP.atom
11 import IMP.rmf
12 import RMF
13 
14 
15 def parse_rmsd_selection(h, selection):
16  s0 = None
17  for idx, selected_range in enumerate(selection.values()):
18  # parse tuple selection in dictionary file for residue ranges
19  s = IMP.atom.Selection(h, resolution=1,
20  molecule=str(selected_range[0][2]),
21  residue_indexes=range(selected_range[0][0],
22  selected_range[0][1]))
23  if idx == 0:
24  s0 = s
25  else:
26  s0 |= s
27 
28  return s0
29 
30 
31 def get_pdbs_coordinates(path, idfile_A, idfile_B):
32  pts = []
33  conform = []
34  num = 0
35  masses = []
36  radii = []
37 
38  models_name = []
39 
40  with open(idfile_A, 'w+') as f1:
41  for str_file in sorted(
42  glob.glob("%s/sample_A/*.pdb" % path),
43  key=lambda x: int(x.split('/')[-1].split('.')[0])):
44  print(str_file, num, file=f1)
45  models_name.append(str_file)
46 
47  m = IMP.Model()
48  mh = IMP.atom.read_pdb(str_file, m,
50  mps = IMP.core.get_leaves(mh)
51  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
52  if num == 0:
53  masses = [IMP.atom.Mass(p).get_mass() for p in mps]
54  radii = [IMP.core.XYZR(p).get_radius() for p in mps]
55  conform.append(pts)
56  pts = []
57  num = num + 1
58 
59  with open(idfile_B, 'w+') as f2:
60  for str_file in sorted(
61  glob.glob("%s/sample_B/*.pdb" % path),
62  key=lambda x: int(x.split('/')[-1].split('.')[0])):
63  print(str_file, num, file=f2)
64  models_name.append(str_file)
65 
66  m = IMP.Model()
67  mh = IMP.atom.read_pdb(str_file, m,
69  mps = IMP.core.get_leaves(mh)
70  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
71  conform.append(pts)
72  pts = []
73  num = num + 1
74 
75  return np.array(conform), masses, radii, models_name
76 
77 
78 def get_rmfs_coordinates(path, idfile_A, idfile_B,
79  subunit_name=None, selection=None):
80 
81  conform = []
82  num = 0
83  masses = []
84  radii = []
85  ps_names = []
86 
87  f1 = open(idfile_A, 'w+')
88  f2 = open(idfile_B, 'w+')
89 
90  models_name = []
91 
92  for sample_name, sample_id_file in zip(['A', 'B'], [f1, f2]):
93 
94  for str_file in sorted(
95  glob.glob("%s/sample_%s/*.rmf3" % (path, sample_name)),
96  key=lambda x: int(x.split('/')[-1].split('.')[0])):
97  print(str_file, num, file=sample_id_file)
98  models_name.append(str_file)
99 
100  m = IMP.Model()
101  inf = RMF.open_rmf_file_read_only(str_file)
102  h = IMP.rmf.create_hierarchies(inf, m)[0]
103  IMP.rmf.load_frame(inf, 0)
104 
105  pts = []
106 
107  if subunit_name:
108  s0 = IMP.atom.Selection(h, resolution=1, molecule=subunit_name)
109  elif selection is not None:
110  s0 = parse_rmsd_selection(h, selection)
111  else:
112  s0 = IMP.atom.Selection(h, resolution=1)
113 
114  for leaf in s0.get_selected_particles():
115 
116  p = IMP.core.XYZR(leaf)
117  pts.append([p.get_coordinates()[i] for i in range(3)])
118 
119  if num == 0 and sample_name == 'A':
120  masses.append(IMP.atom.Mass(leaf).get_mass())
121  radii.append(p.get_radius())
122  mol_name = \
124  # traverse up the Hierarchy to get copy number of the
125  # molecule that the bead belongs to
126  copy_number = \
128 
130  # TODO not tested on non-fragment systems
131  residues_in_bead = \
132  IMP.atom.Fragment(leaf).get_residue_indexes()
133 
134  ps_names.append(
135  mol_name + "_" + str(min(residues_in_bead)) + "_"
136  + str(max(residues_in_bead)) + "_"
137  + str(copy_number))
138 
139  else:
140  residue_in_bead = \
141  str(IMP.atom.Residue(leaf).get_index())
142 
143  ps_names.append(mol_name + "_" + residue_in_bead + "_"
144  + residue_in_bead + "_"
145  + str(copy_number))
146 
147  conform.append(pts)
148  pts = []
149  num = num + 1
150  f1.close()
151  f2.close()
152 
153  return ps_names, masses, radii, np.array(conform), models_name
154 
155 
156 def parse_symmetric_groups_file(symm_groups_file):
157  symm_groups = []
158  member_to_symm_group = {}
159  first_group_member = []
160  curr_particle_index_in_group = []
161 
162  sgf = open(symm_groups_file, 'r')
163 
164  for indx, ln in enumerate(sgf.readlines()):
165 
166  symm_groups.append([]) # create new symm group list
167  # particle index for new symmetric group
168  curr_particle_index_in_group.append(-1)
169  fields = ln.strip().split()
170 
171  for fld in fields:
172  # group that the current protein copy belongs to
173  member_to_symm_group[fld] = indx
174 
175  # the first group member is special! We create a symm group list
176  # of particles for the first group member
177  first_group_member.append(fields[0])
178 
179  sgf.close()
180 
181  return (symm_groups, member_to_symm_group, curr_particle_index_in_group,
182  first_group_member)
183 
184 
185 def get_rmfs_coordinates_one_rmf(path, rmf_A, rmf_B, subunit_name=None,
186  symm_groups_file=None, selection=None):
187  '''Modified RMF coordinates function to work with symmetric copies'''
188 
189  # Open RMFs and get total number of models
190  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
191  n_models = [rmf_fh.get_number_of_frames()]
192  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_B))
193  n_models.append(rmf_fh.get_number_of_frames())
194 
195  masses = []
196  radii = []
197  ps_names = []
198 
199  models_name = []
200 
201  # Build hierarchy from the RMF file
202  m = IMP.Model()
203  h = IMP.rmf.create_hierarchies(rmf_fh, m)[0]
204  IMP.rmf.load_frame(rmf_fh, 0)
205  m.update()
206 
207  ######
208  # Initialize output array
209  pts = 0 # number of particles in each model
210 
211  # Get selection
212  if subunit_name:
213  s0 = IMP.atom.Selection(h, resolution=1, molecule=subunit_name)
214  elif selection is not None:
215  s0 = parse_rmsd_selection(h, selection)
216  else:
217  s0 = IMP.atom.Selection(h, resolution=1)
218 
219  # Count particles
220  for leaf in s0.get_selected_particles():
221  p = IMP.core.XYZR(leaf)
222  pts += 1
223 
224  # Initialize array
225  conform = np.empty([n_models[0]+n_models[1], pts, 3])
226 
227  # Initialize the symmetric group particles list, and protein to
228  # symmetry group mapping
229  if symm_groups_file:
230  (symm_groups, group_member_to_symm_group_map,
231  curr_particle_index_in_group, first_group_member) = \
232  parse_symmetric_groups_file(symm_groups_file)
233  else:
234  symm_groups = None
235 
236  mod_id = 0 # index for each model in conform.
237 
238  for rmf_file in [rmf_A, rmf_B]:
239 
240  models_name.append(rmf_file)
241 
242  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
243  h = IMP.rmf.create_hierarchies(rmf_fh, m)[0]
244 
245  print("Opening RMF file:", rmf_file, "with",
246  rmf_fh.get_number_of_frames(), "frames")
247 
248  for f in range(rmf_fh.get_number_of_frames()):
249 
250  if f % 100 == 0:
251  # pass
252  print(" -- Opening frame", f, "of",
253  rmf_fh.get_number_of_frames())
254 
255  IMP.rmf.load_frame(rmf_fh, f)
256 
257  m.update()
258 
259  # Store particle indices and loop over individual protein
260  # names for symmetric copies
261  if subunit_name:
262  s0 = IMP.atom.Selection(h, resolution=1, molecule=subunit_name)
263  elif selection is not None:
264  s0 = parse_rmsd_selection(h, selection)
265  else:
266  s0 = IMP.atom.Selection(h, resolution=1)
267 
268  particles = s0.get_selected_particles()
269 
270  # Copy particle coordinates
271  for i in range(len(particles)):
272  # i is an index over all particles in the system
273 
274  leaf = particles[i]
275  p = IMP.core.XYZR(leaf)
276  pxyz = p.get_coordinates()
277  conform[mod_id][i][0] = pxyz[0]
278  conform[mod_id][i][1] = pxyz[1]
279  conform[mod_id][i][2] = pxyz[2]
280 
281  # Just for the first model, update the masses and radii
282  # and log the particle name in ps_names
283  if mod_id == 0 and rmf_file == rmf_A:
284  masses.append(IMP.atom.Mass(leaf).get_mass())
285  radii.append(p.get_radius())
286  mol_name = IMP.atom.get_molecule_name(
287  IMP.atom.Hierarchy(leaf))
288  # traverse up the Hierarchy to get copy number of
289  # the molecule that the bead belongs to
290  copy_number = \
292 
293  # Add to symmetric groups if needed
294  if symm_groups_file:
295 
296  protein_plus_copy = mol_name+'.'+str(copy_number)
297  if protein_plus_copy in group_member_to_symm_group_map:
298  # protein copy is in a symmetric group
299  group_index = group_member_to_symm_group_map[
300  protein_plus_copy]
301  curr_particle_index_in_group[group_index] += 1
302 
303  if protein_plus_copy \
304  == first_group_member[group_index]:
305  symm_groups[group_index].append([i])
306 
307  else:
308  j = curr_particle_index_in_group[group_index] \
309  % len(symm_groups[group_index])
310  symm_groups[group_index][j].append(i)
311 
312  # TODO not tested on non-fragment systems
314  residues_in_bead = \
315  IMP.atom.Fragment(leaf).get_residue_indexes()
316  ps_names.append(
317  mol_name + "_" + str(min(residues_in_bead)) + "_"
318  + str(max(residues_in_bead)) + "_"
319  + str(copy_number))
320  else:
321  residue_in_bead = \
322  str(IMP.atom.Residue(leaf).get_index())
323  ps_names.append(
324  mol_name + "_" + residue_in_bead + "_" +
325  residue_in_bead + "_" + str(copy_number))
326  mod_id += 1
327 
328  return ps_names, masses, radii, conform, symm_groups, models_name, n_models
329 
330 
331 def get_rmsds_matrix(conforms, mode, sup, cores, symm_groups=None):
332 
333  if (mode == "cpu_serial" and not sup) or (mode == "cpu_omp" and not sup):
334  calculator_name = "NOSUP_OMP_CALCULATOR"
335 
336  elif mode == "cpu_omp" and sup:
337  calculator_name = "QCP_OMP_CALCULATOR"
338  print("we are using QCP_OMP to compute RMSD")
339  elif mode == "cuda" and sup:
340  calculator_name = "QCP_CUDA_MEM_CALCULATOR"
341  else:
342  print("Wrong values to pyRMSD ! Please Fix")
343  exit()
344 
345  if symm_groups:
346  print("We have ambiguity.")
347  calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
348  calculator_name,
349  fittingCoordsets=conforms,
350  calcSymmetryGroups=symm_groups,
351  fitSymmetryGroups=symm_groups)
352 
353  else:
354  calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
355  calculator_name, conforms)
356 
357  # additionally set number of cores for parallel calculator
358  if mode == "cpu_omp":
359  calculator.setNumberOfOpenMPThreads(int(cores))
360 
361  rmsd = calculator.pairwiseRMSDMatrix()
362  rmsd_matrix = CondensedMatrix(rmsd)
363  inner_data = rmsd_matrix.get_data()
364  np.save("Distances_Matrix.data", inner_data)
365 
366  return inner_data
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Add mass to a particle.
Definition: Mass.h:23
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
double get_mass(ResidueType c)
Get the mass from the residue type.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
std::string get_molecule_name(Hierarchy h)
A decorator for a residue.
Definition: Residue.h:135
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27